From: Igor Romanov <igor.roma...@oktetlabs.ru>

For now, a rule may have only one dedicated counter, shared counters
are not supported.

HW delivers (or "streams") counter readings using special packets.
The driver creates a dedicated Rx queue to receive such packets
and requests that HW start "streaming" the readings to it.

The counter queue is polled periodically, and the first available
service core is used for that. Hence, the user has to specify at least
one service core for counters to work. Such a core is shared by all
MAE-capable devices managed by sfc driver.

Signed-off-by: Igor Romanov <igor.roma...@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <andrew.rybche...@oktetlabs.ru>
Reviewed-by: Andy Moreton <amore...@xilinx.com>
Reviewed-by: Ivan Malov <ivan.ma...@oktetlabs.ru>
---
 drivers/net/sfc/meson.build       |  10 +
 drivers/net/sfc/sfc_flow.c        |   7 +
 drivers/net/sfc/sfc_mae.c         | 231 +++++++++++-
 drivers/net/sfc/sfc_mae.h         |  60 ++++
 drivers/net/sfc/sfc_mae_counter.c | 578 ++++++++++++++++++++++++++++++
 drivers/net/sfc/sfc_mae_counter.h |  11 +
 drivers/net/sfc/sfc_stats.h       |  80 +++++
 drivers/net/sfc/sfc_tweak.h       |   9 +
 8 files changed, 981 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/sfc/sfc_stats.h

diff --git a/drivers/net/sfc/meson.build b/drivers/net/sfc/meson.build
index f8880f740a..32b58e3d76 100644
--- a/drivers/net/sfc/meson.build
+++ b/drivers/net/sfc/meson.build
@@ -39,6 +39,16 @@ foreach flag: extra_flags
     endif
 endforeach
 
+# for clang 32-bit compiles we need libatomic for 64-bit atomic ops
+if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false
+    ext_deps += cc.find_library('atomic')
+endif
+
+# for gcc compiles we need -latomic for 128-bit atomic ops
+if cc.get_id() == 'gcc'
+    ext_deps += cc.find_library('atomic')
+endif
+
 deps += ['common_sfc_efx', 'bus_pci']
 sources = files(
         'sfc_ethdev.c',
diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c
index 2db8af1759..1294dbd3a7 100644
--- a/drivers/net/sfc/sfc_flow.c
+++ b/drivers/net/sfc/sfc_flow.c
@@ -24,6 +24,7 @@
 #include "sfc_flow.h"
 #include "sfc_log.h"
 #include "sfc_dp_rx.h"
+#include "sfc_mae_counter.h"
 
 struct sfc_flow_ops_by_spec {
        sfc_flow_parse_cb_t     *parse;
@@ -2854,6 +2855,12 @@ sfc_flow_stop(struct sfc_adapter *sa)
                efx_rx_scale_context_free(sa->nic, rss->dummy_rss_context);
                rss->dummy_rss_context = EFX_RSS_CONTEXT_DEFAULT;
        }
+
+       /*
+        * MAE counter service is not stopped on flow rule remove to avoid
+        * extra work. Make sure that it is stopped here.
+        */
+       sfc_mae_counter_stop(sa);
 }
 
 int
diff --git a/drivers/net/sfc/sfc_mae.c b/drivers/net/sfc/sfc_mae.c
index e603ffbdb4..370a39da1d 100644
--- a/drivers/net/sfc/sfc_mae.c
+++ b/drivers/net/sfc/sfc_mae.c
@@ -19,6 +19,7 @@
 #include "sfc_mae_counter.h"
 #include "sfc_log.h"
 #include "sfc_switch.h"
+#include "sfc_service.h"
 
 static int
 sfc_mae_assign_entity_mport(struct sfc_adapter *sa,
@@ -30,6 +31,19 @@ sfc_mae_assign_entity_mport(struct sfc_adapter *sa,
                                              mportp);
 }
 
+static int
+sfc_mae_counter_registry_init(struct sfc_mae_counter_registry *registry,
+                             uint32_t nb_counters_max)
+{
+       return sfc_mae_counters_init(&registry->counters, nb_counters_max);
+}
+
+static void
+sfc_mae_counter_registry_fini(struct sfc_mae_counter_registry *registry)
+{
+       sfc_mae_counters_fini(&registry->counters);
+}
+
 int
 sfc_mae_attach(struct sfc_adapter *sa)
 {
@@ -59,6 +73,15 @@ sfc_mae_attach(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_mae_get_limits;
 
+       sfc_log_init(sa, "init MAE counter registry");
+       rc = sfc_mae_counter_registry_init(&mae->counter_registry,
+                                          limits.eml_max_n_counters);
+       if (rc != 0) {
+               sfc_err(sa, "failed to init MAE counters registry for %u 
entries: %s",
+                       limits.eml_max_n_counters, rte_strerror(rc));
+               goto fail_counter_registry_init;
+       }
+
        sfc_log_init(sa, "assign entity MPORT");
        rc = sfc_mae_assign_entity_mport(sa, &entity_mport);
        if (rc != 0)
@@ -107,6 +130,9 @@ sfc_mae_attach(struct sfc_adapter *sa)
 fail_mae_assign_switch_port:
 fail_mae_assign_switch_domain:
 fail_mae_assign_entity_mport:
+       sfc_mae_counter_registry_fini(&mae->counter_registry);
+
+fail_counter_registry_init:
 fail_mae_get_limits:
        efx_mae_fini(sa->nic);
 
@@ -131,6 +157,7 @@ sfc_mae_detach(struct sfc_adapter *sa)
                return;
 
        rte_free(mae->bounce_eh.buf);
+       sfc_mae_counter_registry_fini(&mae->counter_registry);
 
        efx_mae_fini(sa->nic);
 
@@ -480,9 +507,72 @@ sfc_mae_encap_header_disable(struct sfc_adapter *sa,
        --(fw_rsrc->refcnt);
 }
 
+static int
+sfc_mae_counters_enable(struct sfc_adapter *sa,
+                       struct sfc_mae_counter_id *counters,
+                       unsigned int n_counters,
+                       efx_mae_actions_t *action_set_spec)
+{
+       int rc;
+
+       sfc_log_init(sa, "entry");
+
+       if (n_counters == 0) {
+               sfc_log_init(sa, "no counters - skip");
+               return 0;
+       }
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+       SFC_ASSERT(n_counters == 1);
+
+       rc = sfc_mae_counter_enable(sa, &counters[0]);
+       if (rc != 0) {
+               sfc_err(sa, "failed to enable MAE counter %u: %s",
+                       counters[0].mae_id.id, rte_strerror(rc));
+               goto fail_counter_add;
+       }
+
+       rc = efx_mae_action_set_fill_in_counter_id(action_set_spec,
+                                                  &counters[0].mae_id);
+       if (rc != 0) {
+               sfc_err(sa, "failed to fill in MAE counter %u in action set: 
%s",
+                       counters[0].mae_id.id, rte_strerror(rc));
+               goto fail_fill_in_id;
+       }
+
+       return 0;
+
+fail_fill_in_id:
+       (void)sfc_mae_counter_disable(sa, &counters[0]);
+
+fail_counter_add:
+       sfc_log_init(sa, "failed: %s", rte_strerror(rc));
+       return rc;
+}
+
+static int
+sfc_mae_counters_disable(struct sfc_adapter *sa,
+                        struct sfc_mae_counter_id *counters,
+                        unsigned int n_counters)
+{
+       if (n_counters == 0)
+               return 0;
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+       SFC_ASSERT(n_counters == 1);
+
+       if (counters[0].mae_id.id == EFX_MAE_RSRC_ID_INVALID) {
+               sfc_err(sa, "failed to disable: already disabled");
+               return EALREADY;
+       }
+
+       return sfc_mae_counter_disable(sa, &counters[0]);
+}
+
 static struct sfc_mae_action_set *
 sfc_mae_action_set_attach(struct sfc_adapter *sa,
                          const struct sfc_mae_encap_header *encap_header,
+                         unsigned int n_count,
                          const efx_mae_actions_t *spec)
 {
        struct sfc_mae_action_set *action_set;
@@ -491,7 +581,12 @@ sfc_mae_action_set_attach(struct sfc_adapter *sa,
        SFC_ASSERT(sfc_adapter_is_locked(sa));
 
        TAILQ_FOREACH(action_set, &mae->action_sets, entries) {
+               /*
+                * Shared counters are not supported, hence action sets with
+                * COUNT are not attachable.
+                */
                if (action_set->encap_header == encap_header &&
+                   n_count == 0 &&
                    efx_mae_action_set_specs_equal(action_set->spec, spec)) {
                        sfc_dbg(sa, "attaching to action_set=%p", action_set);
                        ++(action_set->refcnt);
@@ -504,18 +599,52 @@ sfc_mae_action_set_attach(struct sfc_adapter *sa,
 
 static int
 sfc_mae_action_set_add(struct sfc_adapter *sa,
+                      const struct rte_flow_action actions[],
                       efx_mae_actions_t *spec,
                       struct sfc_mae_encap_header *encap_header,
+                      unsigned int n_counters,
                       struct sfc_mae_action_set **action_setp)
 {
        struct sfc_mae_action_set *action_set;
        struct sfc_mae *mae = &sa->mae;
+       unsigned int i;
 
        SFC_ASSERT(sfc_adapter_is_locked(sa));
 
        action_set = rte_zmalloc("sfc_mae_action_set", sizeof(*action_set), 0);
-       if (action_set == NULL)
+       if (action_set == NULL) {
+               sfc_err(sa, "failed to alloc action set");
                return ENOMEM;
+       }
+
+       if (n_counters > 0) {
+               const struct rte_flow_action *action;
+
+               action_set->counters = rte_malloc("sfc_mae_counter_ids",
+                       sizeof(action_set->counters[0]) * n_counters, 0);
+               if (action_set->counters == NULL) {
+                       rte_free(action_set);
+                       sfc_err(sa, "failed to alloc counters");
+                       return ENOMEM;
+               }
+
+               for (action = actions, i = 0;
+                    action->type != RTE_FLOW_ACTION_TYPE_END && i < n_counters;
+                    ++action) {
+                       const struct rte_flow_action_count *conf;
+
+                       if (action->type != RTE_FLOW_ACTION_TYPE_COUNT)
+                               continue;
+
+                       conf = action->conf;
+
+                       action_set->counters[i].mae_id.id =
+                               EFX_MAE_RSRC_ID_INVALID;
+                       action_set->counters[i].rte_id = conf->id;
+                       i++;
+               }
+               action_set->n_counters = n_counters;
+       }
 
        action_set->refcnt = 1;
        action_set->spec = spec;
@@ -555,6 +684,12 @@ sfc_mae_action_set_del(struct sfc_adapter *sa,
 
        efx_mae_action_set_spec_fini(sa->nic, action_set->spec);
        sfc_mae_encap_header_del(sa, action_set->encap_header);
+       if (action_set->n_counters > 0) {
+               SFC_ASSERT(action_set->n_counters == 1);
+               SFC_ASSERT(action_set->counters[0].mae_id.id ==
+                          EFX_MAE_RSRC_ID_INVALID);
+               rte_free(action_set->counters);
+       }
        TAILQ_REMOVE(&mae->action_sets, action_set, entries);
        rte_free(action_set);
 
@@ -566,6 +701,7 @@ sfc_mae_action_set_enable(struct sfc_adapter *sa,
                          struct sfc_mae_action_set *action_set)
 {
        struct sfc_mae_encap_header *encap_header = action_set->encap_header;
+       struct sfc_mae_counter_id *counters = action_set->counters;
        struct sfc_mae_fw_rsrc *fw_rsrc = &action_set->fw_rsrc;
        int rc;
 
@@ -580,14 +716,26 @@ sfc_mae_action_set_enable(struct sfc_adapter *sa,
                if (rc != 0)
                        return rc;
 
-               rc = efx_mae_action_set_alloc(sa->nic, action_set->spec,
-                                             &fw_rsrc->aset_id);
+               rc = sfc_mae_counters_enable(sa, counters,
+                                            action_set->n_counters,
+                                            action_set->spec);
                if (rc != 0) {
+                       sfc_err(sa, "failed to enable %u MAE counters: %s",
+                               action_set->n_counters, rte_strerror(rc));
+
                        sfc_mae_encap_header_disable(sa, encap_header);
+                       return rc;
+               }
 
+               rc = efx_mae_action_set_alloc(sa->nic, action_set->spec,
+                                             &fw_rsrc->aset_id);
+               if (rc != 0) {
                        sfc_err(sa, "failed to enable action_set=%p: %s",
                                action_set, strerror(rc));
 
+                       (void)sfc_mae_counters_disable(sa, counters,
+                                                      action_set->n_counters);
+                       sfc_mae_encap_header_disable(sa, encap_header);
                        return rc;
                }
 
@@ -627,6 +775,13 @@ sfc_mae_action_set_disable(struct sfc_adapter *sa,
                }
                fw_rsrc->aset_id.id = EFX_MAE_RSRC_ID_INVALID;
 
+               rc = sfc_mae_counters_disable(sa, action_set->counters,
+                                             action_set->n_counters);
+               if (rc != 0) {
+                       sfc_err(sa, "failed to disable %u MAE counters: %s",
+                               action_set->n_counters, rte_strerror(rc));
+               }
+
                sfc_mae_encap_header_disable(sa, action_set->encap_header);
        }
 
@@ -2598,6 +2753,48 @@ sfc_mae_rule_parse_action_mark(const struct 
rte_flow_action_mark *conf,
        return efx_mae_action_set_populate_mark(spec, conf->id);
 }
 
+static int
+sfc_mae_rule_parse_action_count(struct sfc_adapter *sa,
+                               const struct rte_flow_action_count *conf,
+                               efx_mae_actions_t *spec)
+{
+       int rc;
+
+       if (conf->shared) {
+               rc = ENOTSUP;
+               goto fail_counter_shared;
+       }
+
+       if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0) {
+               sfc_err(sa,
+                       "counter queue is not configured for COUNT action");
+               rc = EINVAL;
+               goto fail_counter_queue_uninit;
+       }
+
+       if (sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE) {
+               rc = EINVAL;
+               goto fail_no_service_core;
+       }
+
+       rc = efx_mae_action_set_populate_count(spec);
+       if (rc != 0) {
+               sfc_err(sa,
+                       "failed to populate counters in MAE action set: %s",
+                       rte_strerror(rc));
+               goto fail_populate_count;
+       }
+
+       return 0;
+
+fail_populate_count:
+fail_no_service_core:
+fail_counter_queue_uninit:
+fail_counter_shared:
+
+       return rc;
+}
+
 static int
 sfc_mae_rule_parse_action_phy_port(struct sfc_adapter *sa,
                                   const struct rte_flow_action_phy_port *conf,
@@ -2713,6 +2910,11 @@ sfc_mae_rule_parse_action(struct sfc_adapter *sa,
                                                           spec, error);
                custom_error = B_TRUE;
                break;
+       case RTE_FLOW_ACTION_TYPE_COUNT:
+               SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_COUNT,
+                                      bundle->actions_mask);
+               rc = sfc_mae_rule_parse_action_count(sa, action->conf, spec);
+               break;
        case RTE_FLOW_ACTION_TYPE_FLAG:
                SFC_BUILD_SET_OVERFLOW(RTE_FLOW_ACTION_TYPE_FLAG,
                                       bundle->actions_mask);
@@ -2798,6 +3000,7 @@ sfc_mae_rule_parse_actions(struct sfc_adapter *sa,
        const struct rte_flow_action *action;
        struct sfc_mae *mae = &sa->mae;
        efx_mae_actions_t *spec;
+       unsigned int n_count;
        int rc;
 
        rte_errno = 0;
@@ -2835,15 +3038,22 @@ sfc_mae_rule_parse_actions(struct sfc_adapter *sa,
        if (rc != 0)
                goto fail_process_encap_header;
 
+       n_count = efx_mae_action_set_get_nb_count(spec);
+       if (n_count > 1) {
+               rc = ENOTSUP;
+               sfc_err(sa, "too many count actions requested: %u", n_count);
+               goto fail_nb_count;
+       }
+
        spec_mae->action_set = sfc_mae_action_set_attach(sa, encap_header,
-                                                        spec);
+                                                        n_count, spec);
        if (spec_mae->action_set != NULL) {
                sfc_mae_encap_header_del(sa, encap_header);
                efx_mae_action_set_spec_fini(sa->nic, spec);
                return 0;
        }
 
-       rc = sfc_mae_action_set_add(sa, spec, encap_header,
+       rc = sfc_mae_action_set_add(sa, actions, spec, encap_header, n_count,
                                    &spec_mae->action_set);
        if (rc != 0)
                goto fail_action_set_add;
@@ -2851,6 +3061,7 @@ sfc_mae_rule_parse_actions(struct sfc_adapter *sa,
        return 0;
 
 fail_action_set_add:
+fail_nb_count:
        sfc_mae_encap_header_del(sa, encap_header);
 
 fail_process_encap_header:
@@ -3005,6 +3216,15 @@ sfc_mae_flow_insert(struct sfc_adapter *sa,
        if (rc != 0)
                goto fail_action_set_enable;
 
+       if (action_set->n_counters > 0) {
+               rc = sfc_mae_counter_start(sa);
+               if (rc != 0) {
+                       sfc_err(sa, "failed to start MAE counters support: %s",
+                               rte_strerror(rc));
+                       goto fail_mae_counter_start;
+               }
+       }
+
        rc = efx_mae_action_rule_insert(sa->nic, spec_mae->match_spec,
                                        NULL, &fw_rsrc->aset_id,
                                        &spec_mae->rule_id);
@@ -3017,6 +3237,7 @@ sfc_mae_flow_insert(struct sfc_adapter *sa,
        return 0;
 
 fail_action_rule_insert:
+fail_mae_counter_start:
        sfc_mae_action_set_disable(sa, action_set);
 
 fail_action_set_enable:
diff --git a/drivers/net/sfc/sfc_mae.h b/drivers/net/sfc/sfc_mae.h
index 0241fe33c4..15fe5ebca5 100644
--- a/drivers/net/sfc/sfc_mae.h
+++ b/drivers/net/sfc/sfc_mae.h
@@ -16,6 +16,8 @@
 
 #include "efx.h"
 
+#include "sfc_stats.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -54,10 +56,20 @@ struct sfc_mae_encap_header {
 
 TAILQ_HEAD(sfc_mae_encap_headers, sfc_mae_encap_header);
 
+/* Counter ID */
+struct sfc_mae_counter_id {
+       /* ID of a counter in MAE */
+       efx_counter_t                   mae_id;
+       /* ID of a counter in RTE */
+       uint32_t                        rte_id;
+};
+
 /** Action set registry entry */
 struct sfc_mae_action_set {
        TAILQ_ENTRY(sfc_mae_action_set) entries;
        unsigned int                    refcnt;
+       struct sfc_mae_counter_id       *counters;
+       uint32_t                        n_counters;
        efx_mae_actions_t               *spec;
        struct sfc_mae_encap_header     *encap_header;
        struct sfc_mae_fw_rsrc          fw_rsrc;
@@ -83,6 +95,50 @@ struct sfc_mae_bounce_eh {
        efx_tunnel_protocol_t           type;
 };
 
+/** Counter collection entry */
+struct sfc_mae_counter {
+       bool                            inuse;
+       uint32_t                        generation_count;
+       union sfc_pkts_bytes            value;
+       union sfc_pkts_bytes            reset;
+};
+
+struct sfc_mae_counters_xstats {
+       uint64_t                        not_inuse_update;
+       uint64_t                        realloc_update;
+};
+
+struct sfc_mae_counters {
+       /** An array of all MAE counters */
+       struct sfc_mae_counter          *mae_counters;
+       /** Extra statistics for counters */
+       struct sfc_mae_counters_xstats  xstats;
+       /** Count of all MAE counters */
+       unsigned int                    n_mae_counters;
+};
+
+struct sfc_mae_counter_registry {
+       /* Common counter information */
+       /** Counters collection */
+       struct sfc_mae_counters         counters;
+
+       /* Information used by counter update service */
+       /** Callback to get packets from RxQ */
+       eth_rx_burst_t                  rx_pkt_burst;
+       /** Data for the callback to get packets */
+       struct sfc_dp_rxq               *rx_dp;
+       /** Number of buffers pushed to the RxQ */
+       unsigned int                    pushed_n_buffers;
+       /** Are credits used by counter stream */
+       bool                            use_credits;
+
+       /* Information used by configuration routines */
+       /** Counter service core ID */
+       uint32_t                        service_core_id;
+       /** Counter service ID */
+       uint32_t                        service_id;
+};
+
 struct sfc_mae {
        /** Assigned switch domain identifier */
        uint16_t                        switch_domain_id;
@@ -104,6 +160,10 @@ struct sfc_mae {
        struct sfc_mae_action_sets      action_sets;
        /** Encap. header bounce buffer */
        struct sfc_mae_bounce_eh        bounce_eh;
+       /** Flag indicating whether counter-only RxQ is running */
+       bool                            counter_rxq_running;
+       /** Counter registry */
+       struct sfc_mae_counter_registry counter_registry;
 };
 
 struct sfc_adapter;
diff --git a/drivers/net/sfc/sfc_mae_counter.c 
b/drivers/net/sfc/sfc_mae_counter.c
index c7646cf7b1..3aeb37f7ec 100644
--- a/drivers/net/sfc/sfc_mae_counter.c
+++ b/drivers/net/sfc/sfc_mae_counter.c
@@ -4,8 +4,10 @@
  */
 
 #include <rte_common.h>
+#include <rte_service_component.h>
 
 #include "efx.h"
+#include "efx_regs_counters_pkt_format.h"
 
 #include "sfc_ev.h"
 #include "sfc.h"
@@ -49,6 +51,520 @@ sfc_mae_counter_rxq_required(struct sfc_adapter *sa)
        return true;
 }
 
+int
+sfc_mae_counter_enable(struct sfc_adapter *sa,
+                      struct sfc_mae_counter_id *counterp)
+{
+       struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
+       struct sfc_mae_counters *counters = &reg->counters;
+       struct sfc_mae_counter *p;
+       efx_counter_t mae_counter;
+       uint32_t generation_count;
+       uint32_t unused;
+       int rc;
+
+       /*
+        * The actual count of counters allocated is ignored since a failure
+        * to allocate a single counter is indicated by non-zero return code.
+        */
+       rc = efx_mae_counters_alloc(sa->nic, 1, &unused, &mae_counter,
+                                   &generation_count);
+       if (rc != 0) {
+               sfc_err(sa, "failed to alloc MAE counter: %s",
+                       rte_strerror(rc));
+               goto fail_mae_counter_alloc;
+       }
+
+       if (mae_counter.id >= counters->n_mae_counters) {
+               /*
+                * ID of a counter is expected to be within the range
+                * between 0 and the maximum count of counters to always
+                * fit into a pre-allocated array size of maximum counter ID.
+                */
+               sfc_err(sa, "MAE counter ID is out of expected range");
+               rc = EFAULT;
+               goto fail_counter_id_range;
+       }
+
+       counterp->mae_id = mae_counter;
+
+       p = &counters->mae_counters[mae_counter.id];
+
+       /*
+        * Ordering is relaxed since it is the only operation on counter value.
+        * And it does not depend on different stores/loads in other threads.
+        * Paired with relaxed ordering in counter increment.
+        */
+       __atomic_store(&p->reset.pkts_bytes.int128,
+                      &p->value.pkts_bytes.int128, __ATOMIC_RELAXED);
+       p->generation_count = generation_count;
+
+       /*
+        * The flag is set at the very end of add operation and reset
+        * at the beginning of delete operation. Release ordering is
+        * paired with acquire ordering on load in counter increment operation.
+        */
+       __atomic_store_n(&p->inuse, true, __ATOMIC_RELEASE);
+
+       sfc_info(sa, "enabled MAE counter #%u with reset pkts=%" PRIu64
+                " bytes=%" PRIu64, mae_counter.id,
+                p->reset.pkts, p->reset.bytes);
+
+       return 0;
+
+fail_counter_id_range:
+       (void)efx_mae_counters_free(sa->nic, 1, &unused, &mae_counter, NULL);
+
+fail_mae_counter_alloc:
+       sfc_log_init(sa, "failed: %s", rte_strerror(rc));
+       return rc;
+}
+
+int
+sfc_mae_counter_disable(struct sfc_adapter *sa,
+                       struct sfc_mae_counter_id *counter)
+{
+       struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
+       struct sfc_mae_counters *counters = &reg->counters;
+       struct sfc_mae_counter *p;
+       uint32_t unused;
+       int rc;
+
+       if (counter->mae_id.id == EFX_MAE_RSRC_ID_INVALID)
+               return 0;
+
+       SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters);
+       /*
+        * The flag is set at the very end of add operation and reset
+        * at the beginning of delete operation. Release ordering is
+        * paired with acquire ordering on load in counter increment operation.
+        */
+       p = &counters->mae_counters[counter->mae_id.id];
+       __atomic_store_n(&p->inuse, false, __ATOMIC_RELEASE);
+
+       rc = efx_mae_counters_free(sa->nic, 1, &unused, &counter->mae_id, NULL);
+       if (rc != 0)
+               sfc_err(sa, "failed to free MAE counter %u: %s",
+                       counter->mae_id.id, rte_strerror(rc));
+
+       sfc_info(sa, "disabled MAE counter #%u with reset pkts=%" PRIu64
+                " bytes=%" PRIu64, counter->mae_id.id,
+                p->reset.pkts, p->reset.bytes);
+
+       /*
+        * Do this regardless of what efx_mae_counters_free() return value is.
+        * If there's some error, the resulting resource leakage is bad, but
+        * nothing sensible can be done in this case.
+        */
+       counter->mae_id.id = EFX_MAE_RSRC_ID_INVALID;
+
+       return rc;
+}
+
+static void
+sfc_mae_counter_increment(struct sfc_adapter *sa,
+                         struct sfc_mae_counters *counters,
+                         uint32_t mae_counter_id,
+                         uint32_t generation_count,
+                         uint64_t pkts, uint64_t bytes)
+{
+       struct sfc_mae_counter *p = &counters->mae_counters[mae_counter_id];
+       struct sfc_mae_counters_xstats *xstats = &counters->xstats;
+       union sfc_pkts_bytes cnt_val;
+       bool inuse;
+
+       /*
+        * Acquire ordering is paired with release ordering in counter add
+        * and delete operations.
+        */
+       __atomic_load(&p->inuse, &inuse, __ATOMIC_ACQUIRE);
+       if (!inuse) {
+               /*
+                * Two possible cases include:
+                * 1) Counter is just allocated. Too early counter update
+                *    cannot be processed properly.
+                * 2) Stale update of freed and not reallocated counter.
+                *    There is no point in processing that update.
+                */
+               xstats->not_inuse_update++;
+               return;
+       }
+
+       if (unlikely(generation_count < p->generation_count)) {
+               /*
+                * It is a stale update for the reallocated counter
+                * (i.e., freed and the same ID allocated again).
+                */
+               xstats->realloc_update++;
+               return;
+       }
+
+       cnt_val.pkts = p->value.pkts + pkts;
+       cnt_val.bytes = p->value.bytes + bytes;
+
+       /*
+        * Ordering is relaxed since it is the only operation on counter value.
+        * And it does not depend on different stores/loads in other threads.
+        * Paired with relaxed ordering on counter reset.
+        */
+       __atomic_store(&p->value.pkts_bytes,
+                      &cnt_val.pkts_bytes, __ATOMIC_RELAXED);
+
+       sfc_info(sa, "update MAE counter #%u: pkts+%" PRIu64 "=%" PRIu64
+                ", bytes+%" PRIu64 "=%" PRIu64, mae_counter_id,
+                pkts, cnt_val.pkts, bytes, cnt_val.bytes);
+}
+
+static void
+sfc_mae_parse_counter_packet(struct sfc_adapter *sa,
+                            struct sfc_mae_counter_registry *counter_registry,
+                            const struct rte_mbuf *m)
+{
+       uint32_t generation_count;
+       const efx_xword_t *hdr;
+       const efx_oword_t *counters_data;
+       unsigned int version;
+       unsigned int id;
+       unsigned int header_offset;
+       unsigned int payload_offset;
+       unsigned int counter_count;
+       unsigned int required_len;
+       unsigned int i;
+
+       if (unlikely(m->nb_segs != 1)) {
+               sfc_err(sa, "unexpectedly scattered MAE counters packet (%u 
segments)",
+                       m->nb_segs);
+               return;
+       }
+
+       if (unlikely(m->data_len < ER_RX_SL_PACKETISER_HEADER_WORD_SIZE)) {
+               sfc_err(sa, "too short MAE counters packet (%u bytes)",
+                       m->data_len);
+               return;
+       }
+
+       /*
+        * The generation count is located in the Rx prefix in the USER_MARK
+        * field which is written into hash.fdir.hi field of an mbuf. See
+        * SF-123581-TC SmartNIC Datapath Offloads section 4.7.5 Counters.
+        */
+       generation_count = m->hash.fdir.hi;
+
+       hdr = rte_pktmbuf_mtod(m, const efx_xword_t *);
+
+       version = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_VERSION);
+       if (unlikely(version != ERF_SC_PACKETISER_HEADER_VERSION_2)) {
+               sfc_err(sa, "unexpected MAE counters packet version %u",
+                       version);
+               return;
+       }
+
+       id = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_IDENTIFIER);
+       if (unlikely(id != ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR)) {
+               sfc_err(sa, "unexpected MAE counters source identifier %u", id);
+               return;
+       }
+
+       /* Packet layout definitions assume fixed header offset in fact */
+       header_offset =
+               EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_HEADER_OFFSET);
+       if (unlikely(header_offset !=
+                    ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT)) {
+               sfc_err(sa, "unexpected MAE counters packet header offset %u",
+                       header_offset);
+               return;
+       }
+
+       payload_offset =
+               EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_PAYLOAD_OFFSET);
+
+       counter_count = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_COUNT);
+
+       required_len = payload_offset +
+                       counter_count * sizeof(counters_data[0]);
+       if (unlikely(required_len > m->data_len)) {
+               sfc_err(sa, "truncated MAE counters packet: %u counters, packet 
length is %u vs %u required",
+                       counter_count, m->data_len, required_len);
+               /*
+                * In theory it is possible process available counters data,
+                * but such condition is really unexpected and it is
+                * better to treat entire packet as corrupted.
+                */
+               return;
+       }
+
+       /* Ensure that counters data is 32-bit aligned */
+       if (unlikely(payload_offset % sizeof(uint32_t) != 0)) {
+               sfc_err(sa, "unsupported MAE counters payload offset %u, must 
be 32-bit aligned",
+                       payload_offset);
+               return;
+       }
+       RTE_BUILD_BUG_ON(sizeof(counters_data[0]) !=
+                       ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE);
+
+       counters_data =
+               rte_pktmbuf_mtod_offset(m, const efx_oword_t *, payload_offset);
+
+       sfc_info(sa, "update %u MAE counters with gc=%u",
+                counter_count, generation_count);
+
+       for (i = 0; i < counter_count; ++i) {
+               uint32_t packet_count_lo;
+               uint32_t packet_count_hi;
+               uint32_t byte_count_lo;
+               uint32_t byte_count_hi;
+
+               /*
+                * Use 32-bit field accessors below since counters data
+                * is not 64-bit aligned.
+                * 32-bit alignment is checked above taking into account
+                * that start of packet data is 32-bit aligned
+                * (cache-line size aligned in fact).
+                */
+               packet_count_lo =
+                       EFX_OWORD_FIELD32(counters_data[i],
+                               ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO);
+               packet_count_hi =
+                       EFX_OWORD_FIELD32(counters_data[i],
+                               ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_HI);
+               byte_count_lo =
+                       EFX_OWORD_FIELD32(counters_data[i],
+                               ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO);
+               byte_count_hi =
+                       EFX_OWORD_FIELD32(counters_data[i],
+                               ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_HI);
+               sfc_mae_counter_increment(sa,
+                       &counter_registry->counters,
+                       EFX_OWORD_FIELD32(counters_data[i],
+                               ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX),
+                       generation_count,
+                       (uint64_t)packet_count_lo |
+                       ((uint64_t)packet_count_hi <<
+                        ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO_WIDTH),
+                       (uint64_t)byte_count_lo |
+                       ((uint64_t)byte_count_hi <<
+                        ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO_WIDTH));
+       }
+}
+
+static int32_t
+sfc_mae_counter_routine(void *arg)
+{
+       struct sfc_adapter *sa = arg;
+       struct sfc_mae_counter_registry *counter_registry =
+               &sa->mae.counter_registry;
+       struct rte_mbuf *mbufs[SFC_MAE_COUNTER_RX_BURST];
+       unsigned int pushed_diff;
+       unsigned int pushed;
+       unsigned int i;
+       uint16_t n;
+       int rc;
+
+       n = counter_registry->rx_pkt_burst(counter_registry->rx_dp, mbufs,
+                                          SFC_MAE_COUNTER_RX_BURST);
+
+       for (i = 0; i < n; i++)
+               sfc_mae_parse_counter_packet(sa, counter_registry, mbufs[i]);
+
+       rte_pktmbuf_free_bulk(mbufs, n);
+
+       if (!counter_registry->use_credits)
+               return 0;
+
+       pushed = sfc_rx_get_pushed(sa, counter_registry->rx_dp);
+       pushed_diff = pushed - counter_registry->pushed_n_buffers;
+
+       if (pushed_diff >= SFC_COUNTER_RXQ_REFILL_LEVEL) {
+               rc = efx_mae_counters_stream_give_credits(sa->nic, pushed_diff);
+               if (rc == 0) {
+                       counter_registry->pushed_n_buffers = pushed;
+               } else {
+                       /*
+                        * FIXME: counters might be important for the
+                        * application. Handle the error in order to recover
+                        * from the failure
+                        */
+                       SFC_GENERIC_LOG(DEBUG, "Give credits failed: %s",
+                                       rte_strerror(rc));
+               }
+       }
+
+       return 0;
+}
+
+static void
+sfc_mae_counter_service_unregister(struct sfc_adapter *sa)
+{
+       struct sfc_mae_counter_registry *registry =
+               &sa->mae.counter_registry;
+       const unsigned int wait_ms = 10000;
+       unsigned int i;
+
+       rte_service_runstate_set(registry->service_id, 0);
+       rte_service_component_runstate_set(registry->service_id, 0);
+
+       /*
+        * Wait for the counter routine to finish the last iteration.
+        * Give up on timeout.
+        */
+       for (i = 0; i < wait_ms; i++) {
+               if (rte_service_may_be_active(registry->service_id) == 0)
+                       break;
+
+               rte_delay_ms(1);
+       }
+       if (i == wait_ms)
+               sfc_warn(sa, "failed to wait for counter service to stop");
+
+       rte_service_map_lcore_set(registry->service_id,
+                                 registry->service_core_id, 0);
+
+       rte_service_component_unregister(registry->service_id);
+}
+
+static struct sfc_rxq_info *
+sfc_counter_rxq_info_get(struct sfc_adapter *sa)
+{
+       return &sfc_sa2shared(sa)->rxq_info[sa->counter_rxq.sw_index];
+}
+
+static int
+sfc_mae_counter_service_register(struct sfc_adapter *sa,
+                                uint32_t counter_stream_flags)
+{
+       struct rte_service_spec service;
+       char counter_service_name[sizeof(service.name)] = "counter_sevice";
+       struct sfc_mae_counter_registry *counter_registry =
+               &sa->mae.counter_registry;
+       uint32_t cid;
+       uint32_t sid;
+       int rc;
+
+       sfc_log_init(sa, "entry");
+
+       /* Prepare service info */
+       memset(&service, 0, sizeof(service));
+       rte_strscpy(service.name, counter_service_name, sizeof(service.name));
+       service.socket_id = sa->socket_id;
+       service.callback = sfc_mae_counter_routine;
+       service.callback_userdata = sa;
+       counter_registry->rx_pkt_burst = sa->eth_dev->rx_pkt_burst;
+       counter_registry->rx_dp = sfc_counter_rxq_info_get(sa)->dp;
+       counter_registry->pushed_n_buffers = 0;
+       counter_registry->use_credits = counter_stream_flags &
+               EFX_MAE_COUNTERS_STREAM_OUT_USES_CREDITS;
+
+       cid = sfc_get_service_lcore(sa->socket_id);
+       if (cid == RTE_MAX_LCORE && sa->socket_id != SOCKET_ID_ANY) {
+               /* Warn and try to allocate on any NUMA node */
+               sfc_warn(sa,
+                       "failed to get service lcore for counter service at 
socket %d",
+                       sa->socket_id);
+
+               cid = sfc_get_service_lcore(SOCKET_ID_ANY);
+       }
+       if (cid == RTE_MAX_LCORE) {
+               rc = ENOTSUP;
+               sfc_err(sa, "failed to get service lcore for counter service");
+               goto fail_get_service_lcore;
+       }
+
+       /* Service core may be in "stopped" state, start it */
+       rc = rte_service_lcore_start(cid);
+       if (rc != 0 && rc != -EALREADY) {
+               sfc_err(sa, "failed to start service core for counter service: 
%s",
+                       rte_strerror(-rc));
+               rc = ENOTSUP;
+               goto fail_start_core;
+       }
+
+       /* Register counter service */
+       rc = rte_service_component_register(&service, &sid);
+       if (rc != 0) {
+               rc = ENOEXEC;
+               sfc_err(sa, "failed to register counter service component");
+               goto fail_register;
+       }
+
+       /* Map the service with the service core */
+       rc = rte_service_map_lcore_set(sid, cid, 1);
+       if (rc != 0) {
+               rc = -rc;
+               sfc_err(sa, "failed to map lcore for counter service: %s",
+                       rte_strerror(rc));
+               goto fail_map_lcore;
+       }
+
+       /* Run the service */
+       rc = rte_service_component_runstate_set(sid, 1);
+       if (rc < 0) {
+               rc = -rc;
+               sfc_err(sa, "failed to run counter service component: %s",
+                       rte_strerror(rc));
+               goto fail_component_runstate_set;
+       }
+       rc = rte_service_runstate_set(sid, 1);
+       if (rc < 0) {
+               rc = -rc;
+               sfc_err(sa, "failed to run counter service");
+               goto fail_runstate_set;
+       }
+
+       counter_registry->service_core_id = cid;
+       counter_registry->service_id = sid;
+
+       sfc_log_init(sa, "done");
+
+       return 0;
+
+fail_runstate_set:
+       rte_service_component_runstate_set(sid, 0);
+
+fail_component_runstate_set:
+       rte_service_map_lcore_set(sid, cid, 0);
+
+fail_map_lcore:
+       rte_service_component_unregister(sid);
+
+fail_register:
+fail_start_core:
+fail_get_service_lcore:
+       sfc_log_init(sa, "failed: %s", rte_strerror(rc));
+
+       return rc;
+}
+
+int
+sfc_mae_counters_init(struct sfc_mae_counters *counters,
+                     uint32_t nb_counters_max)
+{
+       int rc;
+
+       SFC_GENERIC_LOG(DEBUG, "%s: entry", __func__);
+
+       counters->mae_counters = rte_zmalloc("sfc_mae_counters",
+               sizeof(*counters->mae_counters) * nb_counters_max, 0);
+       if (counters->mae_counters == NULL) {
+               rc = ENOMEM;
+               SFC_GENERIC_LOG(ERR, "%s: failed: %s", __func__,
+                               rte_strerror(rc));
+               return rc;
+       }
+
+       counters->n_mae_counters = nb_counters_max;
+
+       SFC_GENERIC_LOG(DEBUG, "%s: done", __func__);
+
+       return 0;
+}
+
+void
+sfc_mae_counters_fini(struct sfc_mae_counters *counters)
+{
+       rte_free(counters->mae_counters);
+       counters->mae_counters = NULL;
+}
+
 int
 sfc_mae_counter_rxq_attach(struct sfc_adapter *sa)
 {
@@ -215,3 +731,65 @@ sfc_mae_counter_rxq_fini(struct sfc_adapter *sa)
 
        sfc_log_init(sa, "done");
 }
+
+void
+sfc_mae_counter_stop(struct sfc_adapter *sa)
+{
+       struct sfc_mae *mae = &sa->mae;
+
+       sfc_log_init(sa, "entry");
+
+       if (!mae->counter_rxq_running) {
+               sfc_log_init(sa, "counter queue is not running - skip");
+               return;
+       }
+
+       sfc_mae_counter_service_unregister(sa);
+       efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
+
+       mae->counter_rxq_running = false;
+
+       sfc_log_init(sa, "done");
+}
+
+int
+sfc_mae_counter_start(struct sfc_adapter *sa)
+{
+       struct sfc_mae *mae = &sa->mae;
+       uint32_t flags;
+       int rc;
+
+       SFC_ASSERT(sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED);
+
+       if (mae->counter_rxq_running)
+               return 0;
+
+       sfc_log_init(sa, "entry");
+
+       rc = efx_mae_counters_stream_start(sa->nic, sa->counter_rxq.sw_index,
+                                          SFC_MAE_COUNTER_STREAM_PACKET_SIZE,
+                                          0 /* No flags required */, &flags);
+       if (rc != 0) {
+               sfc_err(sa, "failed to start MAE counters stream: %s",
+                       rte_strerror(rc));
+               goto fail_counter_stream;
+       }
+
+       sfc_log_init(sa, "stream start flags: 0x%x", flags);
+
+       rc = sfc_mae_counter_service_register(sa, flags);
+       if (rc != 0)
+               goto fail_service_register;
+
+       mae->counter_rxq_running = true;
+
+       return 0;
+
+fail_service_register:
+       efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
+
+fail_counter_stream:
+       sfc_log_init(sa, "failed: %s", rte_strerror(rc));
+
+       return rc;
+}
diff --git a/drivers/net/sfc/sfc_mae_counter.h 
b/drivers/net/sfc/sfc_mae_counter.h
index f16d64a999..f61a6b59cb 100644
--- a/drivers/net/sfc/sfc_mae_counter.h
+++ b/drivers/net/sfc/sfc_mae_counter.h
@@ -38,6 +38,17 @@ void sfc_mae_counter_rxq_detach(struct sfc_adapter *sa);
 int sfc_mae_counter_rxq_init(struct sfc_adapter *sa);
 void sfc_mae_counter_rxq_fini(struct sfc_adapter *sa);
 
+int sfc_mae_counters_init(struct sfc_mae_counters *counters,
+                         uint32_t nb_counters_max);
+void sfc_mae_counters_fini(struct sfc_mae_counters *counters);
+int sfc_mae_counter_enable(struct sfc_adapter *sa,
+                          struct sfc_mae_counter_id *counterp);
+int sfc_mae_counter_disable(struct sfc_adapter *sa,
+                           struct sfc_mae_counter_id *counter);
+
+int sfc_mae_counter_start(struct sfc_adapter *sa);
+void sfc_mae_counter_stop(struct sfc_adapter *sa);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/drivers/net/sfc/sfc_stats.h b/drivers/net/sfc/sfc_stats.h
new file mode 100644
index 0000000000..2d7ab71f14
--- /dev/null
+++ b/drivers/net/sfc/sfc_stats.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2019-2021 Xilinx, Inc.
+ * Copyright(c) 2019 Solarflare Communications Inc.
+ *
+ * This software was jointly developed between OKTET Labs (under contract
+ * for Solarflare) and Solarflare Communications, Inc.
+ */
+
+#ifndef _SFC_STATS_H
+#define _SFC_STATS_H
+
+#include <stdint.h>
+
+#include <rte_atomic.h>
+
+#include "sfc_tweak.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * 64-bit packets and bytes counters covered by 128-bit integer
+ * in order to do atomic updates to guarantee consistency if
+ * required.
+ */
+union sfc_pkts_bytes {
+       RTE_STD_C11
+       struct {
+               uint64_t                pkts;
+               uint64_t                bytes;
+       };
+       rte_int128_t                    pkts_bytes;
+};
+
+/**
+ * Update packets and bytes counters atomically in assumption that
+ * the counter is written on one core only.
+ */
+static inline void
+sfc_pkts_bytes_add(union sfc_pkts_bytes *st, uint64_t pkts, uint64_t bytes)
+{
+#if SFC_SW_STATS_ATOMIC
+       union sfc_pkts_bytes result;
+
+       /* Stats are written on single core only, so just load values */
+       result.pkts = st->pkts + pkts;
+       result.bytes = st->bytes + bytes;
+
+       /*
+        * Store the result atomically to guarantee that the reader
+        * core sees both counter updates together.
+        */
+       __atomic_store_n(&st->pkts_bytes.int128, result.pkts_bytes.int128,
+                        __ATOMIC_RELEASE);
+#else
+       st->pkts += pkts;
+       st->bytes += bytes;
+#endif
+}
+
+/**
+ * Get an atomic copy of a packets and bytes counters.
+ */
+static inline void
+sfc_pkts_bytes_get(const union sfc_pkts_bytes *st, union sfc_pkts_bytes 
*result)
+{
+#if SFC_SW_STATS_ATOMIC
+       result->pkts_bytes.int128 = __atomic_load_n(&st->pkts_bytes.int128,
+                                                   __ATOMIC_ACQUIRE);
+#else
+       *result = *st;
+#endif
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _SFC_STATS_H */
diff --git a/drivers/net/sfc/sfc_tweak.h b/drivers/net/sfc/sfc_tweak.h
index f2d8701421..d09c7a3125 100644
--- a/drivers/net/sfc/sfc_tweak.h
+++ b/drivers/net/sfc/sfc_tweak.h
@@ -42,4 +42,13 @@
  */
 #define SFC_RXD_WAIT_TIMEOUT_NS_DEF    (200U * 1000)
 
+/**
+ * Ideally reading packet and byte counters together should return
+ * consistent values. I.e. a number of bytes corresponds to a number of
+ * packets. Since counters are updated in one thread and queried in
+ * another it requires either locking or atomics which are very
+ * expensive from performance point of view. So, disable it by default.
+ */
+#define SFC_SW_STATS_ATOMIC            0
+
 #endif /* _SFC_TWEAK_H_ */
-- 
2.30.2

Reply via email to