[dpdk-dev] [PATCH v6 07/23] event/dlb: add flexible interface
This commit introduces the flexible interface. This interface allows the core code to operate in PF mode (direct hardware access) or bifurcated mode (hardware configured via kernel driver). This driver currently only supports PF modei, but bifurcated mode will be added in a future patch-set. Note that the flexible interface is not used for data path operations, and thus there are no performance concerns related to the use of function pointers. Signed-off-by: Timothy McDaniel --- drivers/event/dlb/dlb.c | 1 + drivers/event/dlb/dlb_iface.c | 27 +++ drivers/event/dlb/dlb_iface.h | 27 +++ drivers/event/dlb/meson.build | 1 + drivers/event/dlb/pf/dlb_pf.c | 1 + 5 files changed, 57 insertions(+) create mode 100644 drivers/event/dlb/dlb_iface.c create mode 100644 drivers/event/dlb/dlb_iface.h diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 1659f93..8008a50 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -33,6 +33,7 @@ #include #include "dlb_priv.h" +#include "dlb_iface.h" #include "dlb_inline_fns.h" /* diff --git a/drivers/event/dlb/dlb_iface.c b/drivers/event/dlb/dlb_iface.c new file mode 100644 index 000..dd72120 --- /dev/null +++ b/drivers/event/dlb/dlb_iface.c @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2020 Intel Corporation + */ + +#include + +#include "dlb_priv.h" + +/* DLB PMD Internal interface function pointers. + * If VDEV (bifurcated PMD), these will resolve to functions that issue ioctls + * serviced by DLB kernel module. + * If PCI (PF PMD), these will be implemented locally in user mode. + */ + +void (*dlb_iface_low_level_io_init)(struct dlb_eventdev *dlb); + +int (*dlb_iface_open)(struct dlb_hw_dev *handle, const char *name); + +int (*dlb_iface_get_device_version)(struct dlb_hw_dev *handle, + uint8_t *revision); + +int (*dlb_iface_get_num_resources)(struct dlb_hw_dev *handle, + struct dlb_get_num_resources_args *rsrcs); + +int (*dlb_iface_get_cq_poll_mode)(struct dlb_hw_dev *handle, + enum dlb_cq_poll_modes *mode); + diff --git a/drivers/event/dlb/dlb_iface.h b/drivers/event/dlb/dlb_iface.h new file mode 100644 index 000..416d1b3 --- /dev/null +++ b/drivers/event/dlb/dlb_iface.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2020 Intel Corporation + */ + +#ifndef _DLB_IFACE_H +#define _DLB_IFACE_H + +/* DLB PMD Internal interface function pointers. + * If VDEV (bifurcated PMD), these will resolve to functions that issue ioctls + * serviced by DLB kernel module. + * If PCI (PF PMD), these will be implemented locally in user mode. + */ + +extern void (*dlb_iface_low_level_io_init)(struct dlb_eventdev *dlb); + +extern int (*dlb_iface_open)(struct dlb_hw_dev *handle, const char *name); + +extern int (*dlb_iface_get_device_version)(struct dlb_hw_dev *handle, + uint8_t *revision); + +extern int (*dlb_iface_get_num_resources)(struct dlb_hw_dev *handle, + struct dlb_get_num_resources_args *rsrcs); + +extern int (*dlb_iface_get_cq_poll_mode)(struct dlb_hw_dev *handle, +enum dlb_cq_poll_modes *mode); + +#endif /* _DLB_IFACE_H */ diff --git a/drivers/event/dlb/meson.build b/drivers/event/dlb/meson.build index b4bdc8b..8707d3d 100644 --- a/drivers/event/dlb/meson.build +++ b/drivers/event/dlb/meson.build @@ -8,6 +8,7 @@ if not is_linux or not dpdk_conf.has('RTE_ARCH_X86_64') endif sources = files('dlb.c', + 'dlb_iface.c', 'pf/dlb_main.c', 'pf/dlb_pf.c' ) diff --git a/drivers/event/dlb/pf/dlb_pf.c b/drivers/event/dlb/pf/dlb_pf.c index 3f836f3..05fd76c 100644 --- a/drivers/event/dlb/pf/dlb_pf.c +++ b/drivers/event/dlb/pf/dlb_pf.c @@ -27,6 +27,7 @@ #include #include "../dlb_priv.h" +#include "../dlb_iface.h" #include "../dlb_inline_fns.h" #include "dlb_main.h" #include "base/dlb_hw_types.h" -- 2.6.4
[dpdk-dev] [PATCH v6 08/23] event/dlb: add probe-time hardware init
This commit adds probe-time low level hardware initialization. It also adds probe-time init for both primary and secondary DPDK processes. Signed-off-by: Timothy McDaniel --- drivers/event/dlb/dlb.c | 158 +++- drivers/event/dlb/meson.build| 3 +- drivers/event/dlb/pf/base/dlb_resource.c | 302 +++ drivers/event/dlb/pf/dlb_main.c | 20 +- drivers/event/dlb/pf/dlb_pf.c| 86 - 5 files changed, 561 insertions(+), 8 deletions(-) create mode 100644 drivers/event/dlb/pf/base/dlb_resource.c diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 8008a50..57b2837 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -42,10 +42,92 @@ #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX) #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues" #endif +static struct rte_event_dev_info evdev_dlb_default_info = { + .driver_name = "", /* probe will set */ + .min_dequeue_timeout_ns = DLB_MIN_DEQUEUE_TIMEOUT_NS, + .max_dequeue_timeout_ns = DLB_MAX_DEQUEUE_TIMEOUT_NS, +#if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB_MAX_NUM_LDB_QUEUES) + .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV, +#else + .max_event_queues = DLB_MAX_NUM_LDB_QUEUES, +#endif + .max_event_queue_flows = DLB_MAX_NUM_FLOWS, + .max_event_queue_priority_levels = DLB_QID_PRIORITIES, + .max_event_priority_levels = DLB_QID_PRIORITIES, + .max_event_ports = DLB_MAX_NUM_LDB_PORTS, + .max_event_port_dequeue_depth = DLB_MAX_CQ_DEPTH, + .max_event_port_enqueue_depth = DLB_MAX_ENQUEUE_DEPTH, + .max_event_port_links = DLB_MAX_NUM_QIDS_PER_LDB_CQ, + .max_num_events = DLB_MAX_NUM_LDB_CREDITS, + .max_single_link_event_port_queue_pairs = DLB_MAX_NUM_DIR_PORTS, + .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_EVENT_QOS | + RTE_EVENT_DEV_CAP_BURST_MODE | + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | + RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE | + RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES), +}; struct process_local_port_data dlb_port[DLB_MAX_NUM_PORTS][NUM_DLB_PORT_TYPES]; +static int +dlb_hw_query_resources(struct dlb_eventdev *dlb) +{ + struct dlb_hw_dev *handle = &dlb->qm_instance; + struct dlb_hw_resource_info *dlb_info = &handle->info; + int ret; + + ret = dlb_iface_get_num_resources(handle, + &dlb->hw_rsrc_query_results); + if (ret) { + DLB_LOG_ERR("get dlb num resources, err=%d\n", ret); + return ret; + } + + /* Complete filling in device resource info returned to evdev app, +* overriding any default values. +* The capabilities (CAPs) were set at compile time. +*/ + + evdev_dlb_default_info.max_event_queues = + dlb->hw_rsrc_query_results.num_ldb_queues; + + evdev_dlb_default_info.max_event_ports = + dlb->hw_rsrc_query_results.num_ldb_ports; + + evdev_dlb_default_info.max_num_events = + dlb->hw_rsrc_query_results.max_contiguous_ldb_credits; + + /* Save off values used when creating the scheduling domain. */ + + handle->info.num_sched_domains = + dlb->hw_rsrc_query_results.num_sched_domains; + + handle->info.hw_rsrc_max.nb_events_limit = + dlb->hw_rsrc_query_results.max_contiguous_ldb_credits; + + handle->info.hw_rsrc_max.num_queues = + dlb->hw_rsrc_query_results.num_ldb_queues + + dlb->hw_rsrc_query_results.num_dir_ports; + + handle->info.hw_rsrc_max.num_ldb_queues = + dlb->hw_rsrc_query_results.num_ldb_queues; + + handle->info.hw_rsrc_max.num_ldb_ports = + dlb->hw_rsrc_query_results.num_ldb_ports; + + handle->info.hw_rsrc_max.num_dir_ports = + dlb->hw_rsrc_query_results.num_dir_ports; + + handle->info.hw_rsrc_max.reorder_window_size = + dlb->hw_rsrc_query_results.num_hist_list_entries; + + rte_memcpy(dlb_info, &handle->info.hw_rsrc_max, sizeof(*dlb_info)); + + return 0; +} + /* Wrapper for string to int conversion. Substituted for atoi(...), which is * unsafe. */ @@ -227,9 +309,54 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev, const char *name, struct dlb_devargs *dlb_args) { - RTE_SET_USED(dev); - RTE_SET_USED(name); - RTE_SET_USED(dlb_args); + struct dlb_eventdev *dlb; + int err; + + dlb = dev->data->dev_private; + + dlb->event_dev = dev; /* backlink */ + + evdev_dlb_default_info.driver_name = name; + + dlb->max_num_events_override = dlb_args->max_num_events; + dlb->num_dir_credits_override = dlb_args->num_dir_credits_o
[dpdk-dev] [PATCH v6 12/23] event/dlb: add queue setup
Load balanced (ldb) queues are setup here. Directed queues are not set up until link time, at which point we know the directed port ID. Directed queue setup will only fail if this queue is already setup or there are no directed queues left to configure. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- doc/guides/eventdevs/dlb.rst | 35 +++ drivers/event/dlb/dlb.c | 293 +++ drivers/event/dlb/dlb_iface.c| 12 + drivers/event/dlb/dlb_iface.h| 12 + drivers/event/dlb/pf/base/dlb_resource.c | 386 +++ drivers/event/dlb/pf/dlb_pf.c| 81 +++ 6 files changed, 819 insertions(+) diff --git a/doc/guides/eventdevs/dlb.rst b/doc/guides/eventdevs/dlb.rst index 3ac7393..4557ee5 100644 --- a/doc/guides/eventdevs/dlb.rst +++ b/doc/guides/eventdevs/dlb.rst @@ -82,3 +82,38 @@ The PMD does not support the following configuration sequences: This sequence is not supported because the event device must be reconfigured before its ports or queues can be. +Load-Balanced Queues +~~~ + +A load-balanced queue can support atomic and ordered scheduling, or atomic and +unordered scheduling, but not atomic and unordered and ordered scheduling. A +queue's scheduling types are controlled by the event queue configuration. + +If the user sets the ``RTE_EVENT_QUEUE_CFG_ALL_TYPES`` flag, the +``nb_atomic_order_sequences`` determines the supported scheduling types. +With non-zero ``nb_atomic_order_sequences``, the queue is configured for atomic +and ordered scheduling. In this case, ``RTE_SCHED_TYPE_PARALLEL`` scheduling is +supported by scheduling those events as ordered events. Note that when the +event is dequeued, its sched_type will be ``RTE_SCHED_TYPE_ORDERED``. Else if +``nb_atomic_order_sequences`` is zero, the queue is configured for atomic and +unordered scheduling. In this case, ``RTE_SCHED_TYPE_ORDERED`` is unsupported. + +If the ``RTE_EVENT_QUEUE_CFG_ALL_TYPES`` flag is not set, schedule_type +dictates the queue's scheduling type. + +The ``nb_atomic_order_sequences`` queue configuration field sets the ordered +queue's reorder buffer size. DLB has 4 groups of ordered queues, where each +group is configured to contain either 1 queue with 1024 reorder entries, 2 +queues with 512 reorder entries, and so on down to 32 queues with 32 entries. + +When a load-balanced queue is created, the PMD will configure a new sequence +number group on-demand if num_sequence_numbers does not match a pre-existing +group with available reorder buffer entries. If all sequence number groups are +in use, no new group will be created and queue configuration will fail. (Note +that when the PMD is used with a virtual DLB device, it cannot change the +sequence number configuration.) + +The queue's ``nb_atomic_flows`` parameter is ignored by the DLB PMD, because +the DLB does not limit the number of flows a queue can track. In the DLB, all +load-balanced queues can use the full 16-bit flow ID range. + diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index e98a438..edcc6d1 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -657,6 +657,298 @@ dlb_eventdev_queue_default_conf_get(struct rte_eventdev *dev, queue_conf->priority = 0; } +static int32_t +dlb_hw_create_ldb_queue(struct dlb_eventdev *dlb, + struct dlb_queue *queue, + const struct rte_event_queue_conf *evq_conf) +{ + struct dlb_hw_dev *handle = &dlb->qm_instance; + struct dlb_create_ldb_queue_args cfg; + struct dlb_cmd_response response; + int32_t ret; + uint32_t qm_qid; + int sched_type = -1; + + if (evq_conf == NULL) + return -EINVAL; + + if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) { + if (evq_conf->nb_atomic_order_sequences != 0) + sched_type = RTE_SCHED_TYPE_ORDERED; + else + sched_type = RTE_SCHED_TYPE_PARALLEL; + } else + sched_type = evq_conf->schedule_type; + + cfg.response = (uintptr_t)&response; + cfg.num_atomic_inflights = dlb->num_atm_inflights_per_queue; + cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences; + cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences; + + if (sched_type != RTE_SCHED_TYPE_ORDERED) { + cfg.num_sequence_numbers = 0; + cfg.num_qid_inflights = DLB_DEF_UNORDERED_QID_INFLIGHTS; + } + + ret = dlb_iface_ldb_queue_create(handle, &cfg); + if (ret < 0) { + DLB_LOG_ERR("dlb: create LB event queue error, ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + return -EINVAL; + } + + qm_qid = response.id; + + /* Save off queue config for debug, resource lookups, and reconfig */ +
[dpdk-dev] [PATCH v6 11/23] event/dlb: add queue and port default conf
Add support for getting the queue and port default configuration. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index c038794..e98a438 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -630,6 +630,33 @@ dlb_eventdev_configure(const struct rte_eventdev *dev) return 0; } +static void +dlb_eventdev_port_default_conf_get(struct rte_eventdev *dev, + uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + RTE_SET_USED(port_id); + struct dlb_eventdev *dlb = dlb_pmd_priv(dev); + + port_conf->new_event_threshold = dlb->new_event_limit; + port_conf->dequeue_depth = 32; + port_conf->enqueue_depth = DLB_MAX_ENQUEUE_DEPTH; + port_conf->event_port_cfg = 0; +} + +static void +dlb_eventdev_queue_default_conf_get(struct rte_eventdev *dev, + uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + queue_conf->nb_atomic_flows = 1024; + queue_conf->nb_atomic_order_sequences = 32; + queue_conf->event_queue_cfg = 0; + queue_conf->priority = 0; +} + static int set_dev_id(const char *key __rte_unused, const char *value, @@ -706,6 +733,8 @@ dlb_entry_points_init(struct rte_eventdev *dev) static struct rte_eventdev_ops dlb_eventdev_entry_ops = { .dev_infos_get= dlb_eventdev_info_get, .dev_configure= dlb_eventdev_configure, + .queue_def_conf = dlb_eventdev_queue_default_conf_get, + .port_def_conf= dlb_eventdev_port_default_conf_get, .dump = dlb_eventdev_dump, .xstats_get = dlb_eventdev_xstats_get, .xstats_get_names = dlb_eventdev_xstats_get_names, -- 2.6.4
[dpdk-dev] [PATCH v6 09/23] event/dlb: add xstats
Add support for DLB xstats. Perform initialization and add standard xstats entry points Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c| 23 + drivers/event/dlb/dlb_xstats.c | 1222 drivers/event/dlb/meson.build |1 + 3 files changed, 1246 insertions(+) create mode 100644 drivers/event/dlb/dlb_xstats.c diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 57b2837..62b9695 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -71,6 +71,17 @@ static struct rte_event_dev_info evdev_dlb_default_info = { struct process_local_port_data dlb_port[DLB_MAX_NUM_PORTS][NUM_DLB_PORT_TYPES]; +uint32_t +dlb_get_queue_depth(struct dlb_eventdev *dlb, + struct dlb_eventdev_queue *queue) +{ + /* DUMMY FOR NOW So "xstats" patch compiles */ + RTE_SET_USED(dlb); + RTE_SET_USED(queue); + + return 0; +} + static int dlb_hw_query_resources(struct dlb_eventdev *dlb) { @@ -298,6 +309,11 @@ void dlb_entry_points_init(struct rte_eventdev *dev) { static struct rte_eventdev_ops dlb_eventdev_entry_ops = { + .dump = dlb_eventdev_dump, + .xstats_get = dlb_eventdev_xstats_get, + .xstats_get_names = dlb_eventdev_xstats_get_names, + .xstats_get_by_name = dlb_eventdev_xstats_get_by_name, + .xstats_reset = dlb_eventdev_xstats_reset, }; /* Expose PMD's eventdev interface */ @@ -352,6 +368,13 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev, return err; } + /* Complete xtstats runtime initialization */ + err = dlb_xstats_init(dlb); + if (err) { + DLB_LOG_ERR("dlb: failed to init xstats, err=%d\n", err); + return err; + } + rte_spinlock_init(&dlb->qm_instance.resource_lock); dlb_iface_low_level_io_init(dlb); diff --git a/drivers/event/dlb/dlb_xstats.c b/drivers/event/dlb/dlb_xstats.c new file mode 100644 index 000..597c3d7 --- /dev/null +++ b/drivers/event/dlb/dlb_xstats.c @@ -0,0 +1,1222 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2020 Intel Corporation + */ + +#include +#include + +#include "dlb_priv.h" +#include "dlb_inline_fns.h" + +enum dlb_xstats_type { + /* common to device and port */ + rx_ok, /**< Receive an event */ + rx_drop,/**< Error bit set in received QE */ + rx_interrupt_wait, /**< Wait on an interrupt */ + rx_umonitor_umwait, /**< Block using umwait */ + tx_ok, /**< Transmit an event */ + total_polls,/**< Call dequeue_burst */ + zero_polls, /**< Call dequeue burst and return 0 */ + tx_nospc_ldb_hw_credits,/**< Insufficient LDB h/w credits */ + tx_nospc_dir_hw_credits,/**< Insufficient DIR h/w credits */ + tx_nospc_inflight_max, /**< Reach the new_event_threshold */ + tx_nospc_new_event_limit, /**< Insufficient s/w credits */ + tx_nospc_inflight_credits, /**< Port has too few s/w credits */ + /* device specific */ + nb_events_limit,/**< Maximum num of events */ + inflight_events,/**< Current num events outstanding */ + ldb_pool_size, /**< Num load balanced credits */ + dir_pool_size, /**< Num directed credits */ + /* port specific */ + tx_new, /**< Send an OP_NEW event */ + tx_fwd, /**< Send an OP_FORWARD event */ + tx_rel, /**< Send an OP_RELEASE event */ + tx_implicit_rel,/**< Issue an implicit event release */ + tx_sched_ordered, /**< Send a SCHED_TYPE_ORDERED event */ + tx_sched_unordered, /**< Send a SCHED_TYPE_PARALLEL event */ + tx_sched_atomic,/**< Send a SCHED_TYPE_ATOMIC event */ + tx_sched_directed, /**< Send a directed event */ + tx_invalid, /**< Send an event with an invalid op */ + outstanding_releases, /**< # of releases a port owes */ + max_outstanding_releases, /**< max # of releases a port can owe */ + rx_sched_ordered, /**< Dequeue an ordered event */ + rx_sched_unordered, /**< Dequeue an unordered event */ + rx_sched_atomic,/**< Dequeue an atomic event */ + rx_sched_directed, /**< Dequeue an directed event */ + rx_sched_invalid, /**< Dequeue event sched type invalid */ + /* common to port and queue */ + is_configured, /**< Port is configured */ + is_load_balanced,
[dpdk-dev] [PATCH v6 13/23] event/dlb: add port setup
Configure the load balanded (ldb) or directed (dir) port. The consumer queue (CQ) and producer port (PP) are also set up here. Signed-off-by: Timothy McDaniel --- doc/guides/eventdevs/dlb.rst | 40 + drivers/event/dlb/dlb.c | 516 ++- drivers/event/dlb/dlb_iface.c| 11 + drivers/event/dlb/dlb_iface.h| 14 + drivers/event/dlb/pf/base/dlb_resource.c | 1436 +- drivers/event/dlb/pf/dlb_pf.c| 210 + 6 files changed, 2223 insertions(+), 4 deletions(-) diff --git a/doc/guides/eventdevs/dlb.rst b/doc/guides/eventdevs/dlb.rst index 4557ee5..f5fb055 100644 --- a/doc/guides/eventdevs/dlb.rst +++ b/doc/guides/eventdevs/dlb.rst @@ -117,3 +117,43 @@ The queue's ``nb_atomic_flows`` parameter is ignored by the DLB PMD, because the DLB does not limit the number of flows a queue can track. In the DLB, all load-balanced queues can use the full 16-bit flow ID range. +Load-balanced and Directed Ports +~~~ + +DLB ports come in two flavors: load-balanced and directed. The eventdev API +does not have the same concept, but it has a similar one: ports and queues that +are singly-linked (i.e. linked to a single queue or port, respectively). + +The ``rte_event_dev_info_get()`` function reports the number of available +event ports and queues (among other things). For the DLB PMD, max_event_ports +and max_event_queues report the number of available load-balanced ports and +queues, and max_single_link_event_port_queue_pairs reports the number of +available directed ports and queues. + +When a scheduling domain is created in ``rte_event_dev_configure()``, the user +specifies ``nb_event_ports`` and ``nb_single_link_event_port_queues``, which +control the total number of ports (load-balanced and directed) and the number +of directed ports. Hence, the number of requested load-balanced ports is +``nb_event_ports - nb_single_link_event_ports``. The ``nb_event_queues`` field +specifies the total number of queues (load-balanced and directed). The number +of directed queues comes from ``nb_single_link_event_port_queues``, since +directed ports and queues come in pairs. + +When a port is setup, the ``RTE_EVENT_PORT_CFG_SINGLE_LINK`` flag determines +whether it should be configured as a directed (the flag is set) or a +load-balanced (the flag is unset) port. Similarly, the +``RTE_EVENT_QUEUE_CFG_SINGLE_LINK`` queue configuration flag controls +whether it is a directed or load-balanced queue. + +Load-balanced ports can only be linked to load-balanced queues, and directed +ports can only be linked to directed queues. Furthermore, directed ports can +only be linked to a single directed queue (and vice versa), and that link +cannot change after the eventdev is started. + +The eventdev API does not have a directed scheduling type. To support directed +traffic, the dlb PMD detects when an event is being sent to a directed queue +and overrides its scheduling type. Note that the originally selected scheduling +type (atomic, ordered, or parallel) is not preserved, and an event's sched_type +will be set to ``RTE_SCHED_TYPE_ATOMIC`` when it is dequeued from a directed +port. + diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index edcc6d1..4d91ddd 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -152,6 +152,69 @@ dlb_free_qe_mem(struct dlb_port *qm_port) qm_port->consume_qe = NULL; } +static int +dlb_init_consume_qe(struct dlb_port *qm_port, char *mz_name) +{ + struct dlb_cq_pop_qe *qe; + + qe = rte_zmalloc(mz_name, + DLB_NUM_QES_PER_CACHE_LINE * + sizeof(struct dlb_cq_pop_qe), + RTE_CACHE_LINE_SIZE); + + if (qe == NULL) { + DLB_LOG_ERR("dlb: no memory for consume_qe\n"); + return -ENOMEM; + } + + qm_port->consume_qe = qe; + + qe->qe_valid = 0; + qe->qe_frag = 0; + qe->qe_comp = 0; + qe->cq_token = 1; + /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2, +* and so on. +*/ + qe->tokens = 0; /* set at run time */ + qe->meas_lat = 0; + qe->no_dec = 0; + /* Completion IDs are disabled */ + qe->cmp_id = 0; + + return 0; +} + +static int +dlb_init_qe_mem(struct dlb_port *qm_port, char *mz_name) +{ + int ret, sz; + + sz = DLB_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb_enqueue_qe); + + qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE); + + if (qm_port->qe4 == NULL) { + DLB_LOG_ERR("dlb: no qe4 memory\n"); + ret = -ENOMEM; + goto error_exit; + } + + ret = dlb_init_consume_qe(qm_port, mz_name); + if (ret < 0) { + DLB_LOG_ERR("dlb: dlb_init_consume_qe ret=%d\n", ret); + goto error_exit; + } + + return 0; + +error_exi
[dpdk-dev] [PATCH v6 15/23] event/dlb: add port unlink and port unlinks in progress
Add supports for the port unlink(s) eventdev entry points. The unlink operation is an asynchronous operation executed by a control thread, and the unlinks-in-progress function reads a counter shared with the control thread. Port QE and memzone memory is freed here. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c | 166 1 file changed, 166 insertions(+) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 2ad195d..c64f559 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -693,6 +693,169 @@ dlb_eventdev_configure(const struct rte_eventdev *dev) return 0; } +static int16_t +dlb_hw_unmap_ldb_qid_from_port(struct dlb_hw_dev *handle, + uint32_t qm_port_id, + uint16_t qm_qid) +{ + struct dlb_unmap_qid_args cfg; + struct dlb_cmd_response response; + int32_t ret; + + if (handle == NULL) + return -EINVAL; + + cfg.response = (uintptr_t)&response; + cfg.port_id = qm_port_id; + cfg.qid = qm_qid; + + ret = dlb_iface_unmap_qid(handle, &cfg); + if (ret < 0) + DLB_LOG_ERR("dlb: unmap qid error, ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + + return ret; +} + +static int +dlb_event_queue_detach_ldb(struct dlb_eventdev *dlb, + struct dlb_eventdev_port *ev_port, + struct dlb_eventdev_queue *ev_queue) +{ + int ret, i; + + /* Don't unlink until start time. */ + if (dlb->run_state == DLB_RUN_STATE_STOPPED) + return 0; + + for (i = 0; i < DLB_MAX_NUM_QIDS_PER_LDB_CQ; i++) { + if (ev_port->link[i].valid && + ev_port->link[i].queue_id == ev_queue->id) + break; /* found */ + } + + /* This is expected with eventdev API! +* It blindly attempts to unmap all queues. +*/ + if (i == DLB_MAX_NUM_QIDS_PER_LDB_CQ) { + DLB_LOG_DBG("dlb: ignoring LB QID %d not mapped for qm_port %d.\n", + ev_queue->qm_queue.id, + ev_port->qm_port.id); + return 0; + } + + ret = dlb_hw_unmap_ldb_qid_from_port(&dlb->qm_instance, +ev_port->qm_port.id, +ev_queue->qm_queue.id); + if (!ret) + ev_port->link[i].mapped = false; + + return ret; +} + +static int +dlb_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port, +uint8_t queues[], uint16_t nb_unlinks) +{ + struct dlb_eventdev_port *ev_port = event_port; + struct dlb_eventdev *dlb; + int i; + + RTE_SET_USED(dev); + + if (!ev_port->setup_done) { + DLB_LOG_ERR("dlb: evport %d is not configured\n", + ev_port->id); + rte_errno = -EINVAL; + return 0; + } + + if (queues == NULL || nb_unlinks == 0) { + DLB_LOG_DBG("dlb: queues is NULL or nb_unlinks is 0\n"); + return 0; /* Ignore and return success */ + } + + if (ev_port->qm_port.is_directed) { + DLB_LOG_DBG("dlb: ignore unlink from dir port %d\n", + ev_port->id); + rte_errno = 0; + return nb_unlinks; /* as if success */ + } + + dlb = ev_port->dlb; + + for (i = 0; i < nb_unlinks; i++) { + struct dlb_eventdev_queue *ev_queue; + int ret, j; + + if (queues[i] >= dlb->num_queues) { + DLB_LOG_ERR("dlb: invalid queue id %d\n", queues[i]); + rte_errno = -EINVAL; + return i; /* return index of offending queue */ + } + + ev_queue = &dlb->ev_queues[queues[i]]; + + /* Does a link exist? */ + for (j = 0; j < DLB_MAX_NUM_QIDS_PER_LDB_CQ; j++) + if (ev_port->link[j].queue_id == queues[i] && + ev_port->link[j].valid) + break; + + if (j == DLB_MAX_NUM_QIDS_PER_LDB_CQ) + continue; + + ret = dlb_event_queue_detach_ldb(dlb, ev_port, ev_queue); + if (ret) { + DLB_LOG_ERR("unlink err=%d for port %d queue %d\n", + ret, ev_port->id, queues[i]); + rte_errno = -ENOENT; + return i; /* return index of offending queue */ + } + + ev_port->link[j].valid = false; + ev_port->num_links--; + ev_queue->num_links--; + } + + return nb_unlinks; +} + +static int +dlb_eve
[dpdk-dev] [PATCH v6 10/23] event/dlb: add infos get and configure
Add support for configuring the DLB hardware. In particular, this patch configures the DLB hardware's scheduling domain, such that it is provisioned with the requested number of ports and queues, provided sufficient resources are available. Individual queues and ports are configured later in port setup and eventdev start. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- doc/guides/eventdevs/dlb.rst | 48 + drivers/event/dlb/dlb.c | 397 +++ drivers/event/dlb/dlb_iface.c| 11 + drivers/event/dlb/dlb_iface.h| 11 + drivers/event/dlb/pf/base/dlb_resource.c | 4100 +- drivers/event/dlb/pf/dlb_pf.c| 88 + 6 files changed, 4562 insertions(+), 93 deletions(-) diff --git a/doc/guides/eventdevs/dlb.rst b/doc/guides/eventdevs/dlb.rst index 92341c0..3ac7393 100644 --- a/doc/guides/eventdevs/dlb.rst +++ b/doc/guides/eventdevs/dlb.rst @@ -34,3 +34,51 @@ detailed understanding of the hardware, but these details are important when writing high-performance code. This section describes the places where the eventdev API and DLB misalign. +Scheduling Domain Configuration +~~ + +There are 32 scheduling domainis the DLB. +When one is configured, it allocates load-balanced and +directed queues, ports, credits, and other hardware resources. Some +resource allocations are user-controlled -- the number of queues, for example +-- and others, like credit pools (one directed and one load-balanced pool per +scheduling domain), are not. + +The DLB is a closed system eventdev, and as such the ``nb_events_limit`` device +setup argument and the per-port ``new_event_threshold`` argument apply as +defined in the eventdev header file. The limit is applied to all enqueues, +regardless of whether it will consume a directed or load-balanced credit. + +Reconfiguration +~~ + +The Eventdev API allows one to reconfigure a device, its ports, and its queues +by first stopping the device, calling the configuration function(s), then +restarting the device. The DLB does not support configuring an individual queue +or port without first reconfiguring the entire device, however, so there are +certain reconfiguration sequences that are valid in the eventdev API but not +supported by the PMD. + +Specifically, the PMD supports the following configuration sequence: +1. Configure and start the device +2. Stop the device +3. (Optional) Reconfigure the device +4. (Optional) If step 3 is run: + + a. Setup queue(s). The reconfigured queue(s) lose their previous port links. + b. The reconfigured port(s) lose their previous queue links. + +5. (Optional, only if steps 4a and 4b are run) Link port(s) to queue(s) +6. Restart the device. If the device is reconfigured in step 3 but one or more + of its ports or queues are not, the PMD will apply their previous + configuration (including port->queue links) at this time. + +The PMD does not support the following configuration sequences: +1. Configure and start the device +2. Stop the device +3. Setup queue or setup port +4. Start the device + +This sequence is not supported because the event device must be reconfigured +before its ports or queues can be. + diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 62b9695..c038794 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -139,6 +139,19 @@ dlb_hw_query_resources(struct dlb_eventdev *dlb) return 0; } +static void +dlb_free_qe_mem(struct dlb_port *qm_port) +{ + if (qm_port == NULL) + return; + + rte_free(qm_port->qe4); + qm_port->qe4 = NULL; + + rte_free(qm_port->consume_qe); + qm_port->consume_qe = NULL; +} + /* Wrapper for string to int conversion. Substituted for atoi(...), which is * unsafe. */ @@ -231,6 +244,388 @@ set_num_dir_credits(const char *key __rte_unused, DLB_MAX_NUM_DIR_CREDITS); return -EINVAL; } + return 0; +} + +/* VDEV-only notes: + * This function first unmaps all memory mappings and closes the + * domain's file descriptor, which causes the driver to reset the + * scheduling domain. Once that completes (when close() returns), we + * can safely free the dynamically allocated memory used by the + * scheduling domain. + * + * PF-only notes: + * We will maintain a use count and use that to determine when + * a reset is required. In PF mode, we never mmap, or munmap + * device memory, and we own the entire physical PCI device. + */ + +static void +dlb_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig) +{ + struct dlb_eventdev *dlb = dlb_pmd_priv(dev); + enum dlb_configuration_state config_state; + int i, j; + + /* Close and reset the domain */ + dlb_iface_domain_close(dlb); + + /* Free all dynamically allocated port memory */ + for (i = 0; i < dlb->num_ports; i++) +
[dpdk-dev] [PATCH v6 14/23] event/dlb: add port link
Add port link entry point. Directed queues are identified and created at this stage. Their setup deferred until link-time, at which point we know the directed port ID. Directed queue setup will only fail if this queue is already setup or there are no directed queues left to configure. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c | 306 +++ drivers/event/dlb/dlb_iface.c| 9 + drivers/event/dlb/dlb_iface.h| 9 + drivers/event/dlb/pf/base/dlb_resource.c | 641 +++ drivers/event/dlb/pf/dlb_pf.c| 69 5 files changed, 1034 insertions(+) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 4d91ddd..2ad195d 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -1532,6 +1532,311 @@ set_num_atm_inflights(const char *key __rte_unused, return 0; } +static int +dlb_validate_port_link(struct dlb_eventdev_port *ev_port, + uint8_t queue_id, + bool link_exists, + int index) +{ + struct dlb_eventdev *dlb = ev_port->dlb; + struct dlb_eventdev_queue *ev_queue; + bool port_is_dir, queue_is_dir; + + if (queue_id > dlb->num_queues) { + DLB_LOG_ERR("queue_id %d > num queues %d\n", + queue_id, dlb->num_queues); + rte_errno = -EINVAL; + return -1; + } + + ev_queue = &dlb->ev_queues[queue_id]; + + if (!ev_queue->setup_done && + ev_queue->qm_queue.config_state != DLB_PREV_CONFIGURED) { + DLB_LOG_ERR("setup not done and not previously configured\n"); + rte_errno = -EINVAL; + return -1; + } + + port_is_dir = ev_port->qm_port.is_directed; + queue_is_dir = ev_queue->qm_queue.is_directed; + + if (port_is_dir != queue_is_dir) { + DLB_LOG_ERR("%s queue %u can't link to %s port %u\n", + queue_is_dir ? "DIR" : "LDB", ev_queue->id, + port_is_dir ? "DIR" : "LDB", ev_port->id); + + rte_errno = -EINVAL; + return -1; + } + + /* Check if there is space for the requested link */ + if (!link_exists && index == -1) { + DLB_LOG_ERR("no space for new link\n"); + rte_errno = -ENOSPC; + return -1; + } + + /* Check if the directed port is already linked */ + if (ev_port->qm_port.is_directed && ev_port->num_links > 0 && + !link_exists) { + DLB_LOG_ERR("Can't link DIR port %d to >1 queues\n", + ev_port->id); + rte_errno = -EINVAL; + return -1; + } + + /* Check if the directed queue is already linked */ + if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 && + !link_exists) { + DLB_LOG_ERR("Can't link DIR queue %d to >1 ports\n", + ev_queue->id); + rte_errno = -EINVAL; + return -1; + } + + return 0; +} + +static int16_t +dlb_hw_map_ldb_qid_to_port(struct dlb_hw_dev *handle, + uint32_t qm_port_id, + uint16_t qm_qid, + uint8_t priority) +{ + struct dlb_map_qid_args cfg; + struct dlb_cmd_response response; + int32_t ret; + + if (handle == NULL) + return -EINVAL; + + /* Build message */ + cfg.response = (uintptr_t)&response; + cfg.port_id = qm_port_id; + cfg.qid = qm_qid; + cfg.priority = EV_TO_DLB_PRIO(priority); + + ret = dlb_iface_map_qid(handle, &cfg); + if (ret < 0) { + DLB_LOG_ERR("dlb: map qid error, ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + DLB_LOG_ERR("dlb: device_id=%d grp=%d, qm_port=%d, qm_qid=%d prio=%d\n", + handle->device_id, + handle->domain_id, cfg.port_id, + cfg.qid, + cfg.priority); + } else { + DLB_LOG_DBG("dlb: mapped queue %d to qm_port %d\n", + qm_qid, qm_port_id); + } + + return ret; +} + +static int +dlb_event_queue_join_ldb(struct dlb_eventdev *dlb, +struct dlb_eventdev_port *ev_port, +struct dlb_eventdev_queue *ev_queue, +uint8_t priority) +{ + int first_avail = -1; + int ret, i; + + for (i = 0; i < DLB_MAX_NUM_QIDS_PER_LDB_CQ; i++) { + if (ev_port->link[i].valid) { + if (ev_port->link[i].queue_id == ev_queue->id && + ev_port->link[i].priority == priority) { + if (ev_
[dpdk-dev] [PATCH v6 16/23] event/dlb: add eventdev start
Add support for the eventdev start entry point. DLB delays setting up single link resources until eventdev start, because it is only then that it can ascertain which ports have just one linked queue. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c | 224 +-- drivers/event/dlb/dlb_iface.c| 3 + drivers/event/dlb/dlb_iface.h| 3 + drivers/event/dlb/pf/base/dlb_resource.c | 142 drivers/event/dlb/pf/dlb_pf.c| 23 5 files changed, 351 insertions(+), 44 deletions(-) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index c64f559..780ff7d 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -1626,6 +1626,47 @@ dlb_eventdev_port_setup(struct rte_eventdev *dev, } static int +dlb_eventdev_reapply_configuration(struct rte_eventdev *dev) +{ + struct dlb_eventdev *dlb = dlb_pmd_priv(dev); + int ret, i; + + /* If an event queue or port was previously configured, but hasn't been +* reconfigured, reapply its original configuration. +*/ + for (i = 0; i < dlb->num_queues; i++) { + struct dlb_eventdev_queue *ev_queue; + + ev_queue = &dlb->ev_queues[i]; + + if (ev_queue->qm_queue.config_state != DLB_PREV_CONFIGURED) + continue; + + ret = dlb_eventdev_queue_setup(dev, i, &ev_queue->conf); + if (ret < 0) { + DLB_LOG_ERR("dlb: failed to reconfigure queue %d", i); + return ret; + } + } + + for (i = 0; i < dlb->num_ports; i++) { + struct dlb_eventdev_port *ev_port = &dlb->ev_ports[i]; + + if (ev_port->qm_port.config_state != DLB_PREV_CONFIGURED) + continue; + + ret = dlb_eventdev_port_setup(dev, i, &ev_port->conf); + if (ret < 0) { + DLB_LOG_ERR("dlb: failed to reconfigure ev_port %d", + i); + return ret; + } + } + + return 0; +} + +static int set_dev_id(const char *key __rte_unused, const char *value, void *opaque) @@ -1761,6 +1802,50 @@ dlb_validate_port_link(struct dlb_eventdev_port *ev_port, return 0; } +static int32_t +dlb_hw_create_dir_queue(struct dlb_eventdev *dlb, int32_t qm_port_id) +{ + struct dlb_hw_dev *handle = &dlb->qm_instance; + struct dlb_create_dir_queue_args cfg; + struct dlb_cmd_response response; + int32_t ret; + + cfg.response = (uintptr_t)&response; + + /* The directed port is always configured before its queue */ + cfg.port_id = qm_port_id; + + ret = dlb_iface_dir_queue_create(handle, &cfg); + if (ret < 0) { + DLB_LOG_ERR("dlb: create DIR event queue error, ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + return -EINVAL; + } + + return response.id; +} + +static int +dlb_eventdev_dir_queue_setup(struct dlb_eventdev *dlb, +struct dlb_eventdev_queue *ev_queue, +struct dlb_eventdev_port *ev_port) +{ + int32_t qm_qid; + + qm_qid = dlb_hw_create_dir_queue(dlb, ev_port->qm_port.id); + + if (qm_qid < 0) { + DLB_LOG_ERR("Failed to create the DIR queue\n"); + return qm_qid; + } + + dlb->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id; + + ev_queue->qm_queue.id = qm_qid; + + return 0; +} + static int16_t dlb_hw_map_ldb_qid_to_port(struct dlb_hw_dev *handle, uint32_t qm_port_id, @@ -1836,50 +1921,6 @@ dlb_event_queue_join_ldb(struct dlb_eventdev *dlb, return ret; } -static int32_t -dlb_hw_create_dir_queue(struct dlb_eventdev *dlb, int32_t qm_port_id) -{ - struct dlb_hw_dev *handle = &dlb->qm_instance; - struct dlb_create_dir_queue_args cfg; - struct dlb_cmd_response response; - int32_t ret; - - cfg.response = (uintptr_t)&response; - - /* The directed port is always configured before its queue */ - cfg.port_id = qm_port_id; - - ret = dlb_iface_dir_queue_create(handle, &cfg); - if (ret < 0) { - DLB_LOG_ERR("dlb: create DIR event queue error, ret=%d (driver status: %s)\n", - ret, dlb_error_strings[response.status]); - return -EINVAL; - } - - return response.id; -} - -static int -dlb_eventdev_dir_queue_setup(struct dlb_eventdev *dlb, -struct dlb_eventdev_queue *ev_queue, -struct dlb_eventdev_port *ev_port) -{ - int32_t qm_qid; - - qm_qid = dlb_hw_create_dir_queue(dlb, ev_port->qm_port.id); - - if (qm_qid < 0) { - DLB_
[dpdk-dev] [PATCH v6 18/23] event/dlb: add dequeue and its burst variants
Add support for dequeue, dequeue_burst, ... DLB does not currently support interrupts, but instead uses umonitor/umwait if supported by the processor. This allows the software to monitor and wait on writes to a cache-line. DLB supports normal and sparse cq mode. In normal mode the hardware will pack 4 QEs into each cache line. In sparse cq mode, the hardware will only populate one QE per cache line. Software must be aware of the cq mode, and take the appropriate actions, based on the mode. Signed-off-by: Timothy McDaniel --- doc/guides/eventdevs/dlb.rst | 21 ++ drivers/event/dlb/dlb.c | 728 +++ 2 files changed, 749 insertions(+) diff --git a/doc/guides/eventdevs/dlb.rst b/doc/guides/eventdevs/dlb.rst index 12142b1..6940ef5 100644 --- a/doc/guides/eventdevs/dlb.rst +++ b/doc/guides/eventdevs/dlb.rst @@ -318,3 +318,24 @@ increase a vdev's per-queue atomic-inflight allocation to (for example) 64: --vdev=dlb1_event,atm_inflights=64 +Deferred Scheduling +~~ + +The DLB PMD's default behavior for managing a CQ is to "pop" the CQ once per +dequeued event before returning from rte_event_dequeue_burst(). This frees the +corresponding entries in the CQ, which enables the DLB to schedule more events +to it. + +To support applications seeking finer-grained scheduling control -- for example +deferring scheduling to get the best possible priority scheduling and +load-balancing -- the PMD supports a deferred scheduling mode. In this mode, +the CQ entry is not popped until the *subsequent* rte_event_dequeue_burst() +call. This mode only applies to load-balanced event ports with dequeue depth of +1. + +To enable deferred scheduling, use the defer_sched vdev argument like so: + +.. code-block:: console + + --vdev=dlb1_event,defer_sched=on + diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 4d65a7f..c022139 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -2812,9 +2812,728 @@ dlb_event_enqueue_forward_burst_delayed(void *event_port, return __dlb_event_enqueue_burst(event_port, events, num); } +static __rte_always_inline int +dlb_recv_qe(struct dlb_port *qm_port, struct dlb_dequeue_qe *qe, + uint8_t *offset) +{ + uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08}, + {0x00, 0x01, 0x03, 0x07} }; + uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08}; + volatile struct dlb_dequeue_qe *cq_addr; + __m128i *qes = (__m128i *)qe; + uint64_t *cache_line_base; + uint8_t gen_bits; + + cq_addr = dlb_port[qm_port->id][PORT_TYPE(qm_port)].cq_base; + cq_addr = &cq_addr[qm_port->cq_idx]; + + cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F); + *offset = ((uintptr_t)cq_addr & 0x30) >> 4; + + /* Load the next CQ cache line from memory. Pack these reads as tight +* as possible to reduce the chance that DLB invalidates the line while +* the CPU is reading it. Read the cache line backwards to ensure that +* if QE[N] (N > 0) is valid, then QEs[0:N-1] are too. +* +* (Valid QEs start at &qe[offset]) +*/ + qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]); + qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]); + qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]); + qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]); + + /* Evict the cache line ASAP */ + dlb_cldemote(cache_line_base); + + /* Extract and combine the gen bits */ + gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) | + ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) | + ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) | + ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3); + + /* XOR the combined bits such that a 1 represents a valid QE */ + gen_bits ^= xor_mask[qm_port->gen_bit][*offset]; + + /* Mask off gen bits we don't care about */ + gen_bits &= and_mask[*offset]; + + return __builtin_popcount(gen_bits); +} + +static inline void +dlb_inc_cq_idx(struct dlb_port *qm_port, int cnt) +{ + uint16_t idx = qm_port->cq_idx_unmasked + cnt; + + qm_port->cq_idx_unmasked = idx; + qm_port->cq_idx = idx & qm_port->cq_depth_mask; + qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1; +} + +static inline int +dlb_process_dequeue_qes(struct dlb_eventdev_port *ev_port, + struct dlb_port *qm_port, + struct rte_event *events, + struct dlb_dequeue_qe *qes, + int cnt) +{ + uint8_t *qid_mappings = qm_port->qid_mappings; + int i, num; + + RTE_SET_USED(ev_port); /* avoids unused variable error */ + + for (i = 0, num = 0; i < cnt; i++) { + struct dlb_dequeue_qe *qe = &qes[i]; + int sched_type_map[4] = { +
[dpdk-dev] [PATCH v6 22/23] event/dlb: add queue and port release
These entry points are NO-OPS. DLB does not support reconfiguring individual queues or ports. The entire device must be reconfigured. Signed-off-by: Timothy McDaniel --- drivers/event/dlb/dlb.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 0585875..aa22d03 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -158,6 +158,9 @@ dlb_free_qe_mem(struct dlb_port *qm_port) rte_free(qm_port->consume_qe); qm_port->consume_qe = NULL; + + rte_memzone_free(dlb_port[qm_port->id][PORT_TYPE(qm_port)].mz); + dlb_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL; } static int @@ -3854,6 +3857,28 @@ dlb_eventdev_close(struct rte_eventdev *dev) return 0; } +static void +dlb_eventdev_port_release(void *port) +{ + struct dlb_eventdev_port *ev_port = port; + + if (ev_port) { + struct dlb_port *qm_port = &ev_port->qm_port; + + if (qm_port->config_state == DLB_CONFIGURED) + dlb_free_qe_mem(qm_port); + } +} + +static void +dlb_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id) +{ + RTE_SET_USED(dev); + RTE_SET_USED(id); + + /* This function intentionally left blank. */ +} + void dlb_entry_points_init(struct rte_eventdev *dev) { @@ -3868,7 +3893,9 @@ dlb_entry_points_init(struct rte_eventdev *dev) .queue_def_conf = dlb_eventdev_queue_default_conf_get, .port_def_conf= dlb_eventdev_port_default_conf_get, .queue_setup = dlb_eventdev_queue_setup, + .queue_release= dlb_eventdev_queue_release, .port_setup = dlb_eventdev_port_setup, + .port_release = dlb_eventdev_port_release, .port_link= dlb_eventdev_port_link, .port_unlink = dlb_eventdev_port_unlink, .port_unlinks_in_progress = -- 2.6.4
[dpdk-dev] [PATCH v6 20/23] event/dlb: add PMD's token pop public interface
The PMD uses a public interface to allow applications to control the token pop mode. Supported token pop modes are as follows, and they impact core scheduling affinity for ldb ports. AUTO_POP: Pop the CQ tokens immediately after dequeueing. DELAYED_POP: Pop CQ tokens after (dequeue_depth - 1) events are released. Supported on load-balanced ports only. DEFERRED_POP: Pop the CQ tokens during next dequeue operation. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- doc/api/doxy-api-index.md | 1 + drivers/event/dlb/dlb.c | 121 +--- drivers/event/dlb/dlb_priv.h| 3 + drivers/event/dlb/meson.build | 4 +- drivers/event/dlb/rte_pmd_dlb.c | 38 drivers/event/dlb/rte_pmd_dlb.h | 77 +++ drivers/event/dlb/rte_pmd_dlb_event_version.map | 6 ++ 7 files changed, 237 insertions(+), 13 deletions(-) create mode 100644 drivers/event/dlb/rte_pmd_dlb.c create mode 100644 drivers/event/dlb/rte_pmd_dlb.h diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md index b855a8f..a9f636b 100644 --- a/doc/api/doxy-api-index.md +++ b/doc/api/doxy-api-index.md @@ -51,6 +51,7 @@ The public API headers are grouped by topics: [dpaa2_cmdif](@ref rte_pmd_dpaa2_cmdif.h), [dpaa2_qdma] (@ref rte_pmd_dpaa2_qdma.h), [crypto_scheduler] (@ref rte_cryptodev_scheduler.h) + [dlb] (@ref rte_pmd_dlb.h), - **memory**: [memseg] (@ref rte_memory.h), diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index cdabc9b..4e1af0a 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -1021,6 +1021,33 @@ dlb_hw_create_ldb_port(struct dlb_eventdev *dlb, qm_port->dequeue_depth = dequeue_depth; + /* When using the reserved token scheme, token_pop_thresh is +* initially 2 * dequeue_depth. Once the tokens are reserved, +* the enqueue code re-assigns it to dequeue_depth. +*/ + qm_port->token_pop_thresh = cq_depth; + + /* When the deferred scheduling vdev arg is selected, use deferred pop +* for all single-entry CQs. +*/ + if (cfg.cq_depth == 1 || (cfg.cq_depth == 2 && use_rsvd_token_scheme)) { + if (dlb->defer_sched) + qm_port->token_pop_mode = DEFERRED_POP; + } + + /* The default enqueue functions do not include delayed-pop support for +* performance reasons. +*/ + if (qm_port->token_pop_mode == DELAYED_POP) { + dlb->event_dev->enqueue = dlb_event_enqueue_delayed; + dlb->event_dev->enqueue_burst = + dlb_event_enqueue_burst_delayed; + dlb->event_dev->enqueue_new_burst = + dlb_event_enqueue_new_burst_delayed; + dlb->event_dev->enqueue_forward_burst = + dlb_event_enqueue_forward_burst_delayed; + } + qm_port->owed_tokens = 0; qm_port->issued_releases = 0; @@ -1181,6 +1208,8 @@ dlb_hw_create_dir_port(struct dlb_eventdev *dlb, qm_port->dequeue_depth = dequeue_depth; + /* Directed ports are auto-pop, by default. */ + qm_port->token_pop_mode = AUTO_POP; qm_port->owed_tokens = 0; qm_port->issued_releases = 0; @@ -2681,7 +2710,8 @@ dlb_consume_qe_immediate(struct dlb_port *qm_port, int num) static inline uint16_t __dlb_event_enqueue_burst(void *event_port, const struct rte_event events[], - uint16_t num) + uint16_t num, + bool use_delayed) { struct dlb_eventdev_port *ev_port = event_port; struct dlb_port *qm_port = &ev_port->qm_port; @@ -2709,6 +2739,35 @@ __dlb_event_enqueue_burst(void *event_port, for (; j < DLB_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) { const struct rte_event *ev = &events[i + j]; + int16_t thresh = qm_port->token_pop_thresh; + + if (use_delayed && + qm_port->token_pop_mode == DELAYED_POP && + (ev->op == RTE_EVENT_OP_FORWARD || +ev->op == RTE_EVENT_OP_RELEASE) && + qm_port->issued_releases >= thresh - 1) { + /* Insert the token pop QE and break out. This +* may result in a partial HCW, but that is +* simpler than supporting arbitrary QE +* insertion. +*/ + dlb_construct_token_pop_qe(qm_port, j); + + /* Reset the releases for the next QE batch */ + qm_port->issued_rele
[dpdk-dev] [PATCH v6 23/23] event/dlb: add timeout ticks entry point
Adds the timeout ticks conversion function. Adds announcement of availabililty of the new driver for Intel Dynamic Load Balancer 1.0 hardware. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- doc/guides/rel_notes/release_20_11.rst | 5 + drivers/event/dlb/dlb.c| 13 + 2 files changed, 18 insertions(+) diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index cc72609..e3fac7e 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -55,6 +55,11 @@ New Features Also, make sure to start the actual text at the margin. === +* **Added a new driver for the Intel Dynamic Load Balancer v1.0 device.** + + Added the new ``dlb`` eventdev driver for the Intel DLB V1.0 device. See the + :doc:`../eventdevs/dlb` eventdev guide for more details on this new driver. + Removed Items - diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index aa22d03..b21c9b1 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -3879,6 +3879,18 @@ dlb_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id) /* This function intentionally left blank. */ } +static int +dlb_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns, + uint64_t *timeout_ticks) +{ + RTE_SET_USED(dev); + uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9; + + *timeout_ticks = ns * cycles_per_ns; + + return 0; +} + void dlb_entry_points_init(struct rte_eventdev *dev) { @@ -3900,6 +3912,7 @@ dlb_entry_points_init(struct rte_eventdev *dev) .port_unlink = dlb_eventdev_port_unlink, .port_unlinks_in_progress = dlb_eventdev_port_unlinks_in_progress, + .timeout_ticks= dlb_eventdev_timeout_ticks, .dump = dlb_eventdev_dump, .xstats_get = dlb_eventdev_xstats_get, .xstats_get_names = dlb_eventdev_xstats_get_names, -- 2.6.4
[dpdk-dev] [PATCH v6 21/23] event/dlb: add PMD self-tests
Add a variety of self-tests for both ldb and directed ports/queues, as well as configure, start, stop, link, etc... Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- app/test/test_eventdev.c |7 + drivers/event/dlb/dlb.c |1 + drivers/event/dlb/dlb_selftest.c | 1551 ++ drivers/event/dlb/meson.build|1 + 4 files changed, 1560 insertions(+) create mode 100644 drivers/event/dlb/dlb_selftest.c diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c index 62019c1..ba27bed 100644 --- a/app/test/test_eventdev.c +++ b/app/test/test_eventdev.c @@ -1030,6 +1030,12 @@ test_eventdev_selftest_dpaa2(void) return test_eventdev_selftest_impl("event_dpaa2", ""); } +static int +test_eventdev_selftest_dlb(void) +{ + return test_eventdev_selftest_impl("dlb_event", ""); +} + REGISTER_TEST_COMMAND(eventdev_common_autotest, test_eventdev_common); REGISTER_TEST_COMMAND(eventdev_selftest_sw, test_eventdev_selftest_sw); REGISTER_TEST_COMMAND(eventdev_selftest_octeontx, @@ -1037,3 +1043,4 @@ REGISTER_TEST_COMMAND(eventdev_selftest_octeontx, REGISTER_TEST_COMMAND(eventdev_selftest_octeontx2, test_eventdev_selftest_octeontx2); REGISTER_TEST_COMMAND(eventdev_selftest_dpaa2, test_eventdev_selftest_dpaa2); +REGISTER_TEST_COMMAND(eventdev_selftest_dlb, test_eventdev_selftest_dlb); diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index 4e1af0a..0585875 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -3878,6 +3878,7 @@ dlb_entry_points_init(struct rte_eventdev *dev) .xstats_get_names = dlb_eventdev_xstats_get_names, .xstats_get_by_name = dlb_eventdev_xstats_get_by_name, .xstats_reset = dlb_eventdev_xstats_reset, + .dev_selftest = test_dlb_eventdev, }; /* Expose PMD's eventdev interface */ diff --git a/drivers/event/dlb/dlb_selftest.c b/drivers/event/dlb/dlb_selftest.c new file mode 100644 index 000..2be5520 --- /dev/null +++ b/drivers/event/dlb/dlb_selftest.c @@ -0,0 +1,1551 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dlb_priv.h" +#include "rte_pmd_dlb.h" + +#define MAX_PORTS 32 +#define MAX_QIDS 32 +#define DEFAULT_NUM_SEQ_NUMS 32 + +static struct rte_mempool *eventdev_func_mempool; +static int evdev; + +struct test { + struct rte_mempool *mbuf_pool; + int nb_qids; +}; + +/* initialization and config */ +static inline int +init(struct test *t, int nb_queues, int nb_ports) +{ + struct rte_event_dev_config config = {0}; + struct rte_event_dev_info info; + int ret; + + memset(t, 0, sizeof(*t)); + + t->mbuf_pool = eventdev_func_mempool; + + if (rte_event_dev_info_get(evdev, &info)) { + printf("%d: Error querying device info\n", __LINE__); + return -1; + } + + config.nb_event_queues = nb_queues; + config.nb_event_ports = nb_ports; + config.nb_event_queue_flows = info.max_event_queue_flows; + config.nb_events_limit = info.max_num_events; + config.nb_event_port_dequeue_depth = info.max_event_port_dequeue_depth; + config.nb_event_port_enqueue_depth = info.max_event_port_enqueue_depth; + config.dequeue_timeout_ns = info.max_dequeue_timeout_ns; + config.event_dev_cfg = RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT; + + ret = rte_event_dev_configure(evdev, &config); + if (ret < 0) + printf("%d: Error configuring device\n", __LINE__); + + return ret; +} + +static inline int +create_ports(int num_ports) +{ + int i; + + if (num_ports > MAX_PORTS) + return -1; + + for (i = 0; i < num_ports; i++) { + struct rte_event_port_conf conf; + + if (rte_event_port_default_conf_get(evdev, i, &conf)) { + printf("%d: Error querying default port conf\n", + __LINE__); + return -1; + } + + if (rte_event_port_setup(evdev, i, &conf) < 0) { + printf("%d: Error setting up port %d\n", i, __LINE__); + return -1; + } + } + + return 0; +} + +static inline int +create_lb_qids(struct test *t, int num_qids, uint32_t flags) +{ + int i; + + for (i = t->nb_qids; i < t->nb_qids + num_qids; i++) { + struct rte_event_queue_conf conf; + + if (rte_event_queue_default_conf_get(evdev, i, &conf)) { + printf("%d: Error querying default queue conf\n", + __LINE__); + return -1; + } + +
[dpdk-dev] [PATCH v6 19/23] event/dlb: add eventdev stop and close
Add support for eventdev stop and close entry points. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- drivers/event/dlb/dlb.c | 256 +-- drivers/event/dlb/dlb_iface.c| 6 + drivers/event/dlb/dlb_iface.h| 6 + drivers/event/dlb/pf/base/dlb_resource.c | 89 +++ drivers/event/dlb/pf/dlb_pf.c| 47 ++ 5 files changed, 393 insertions(+), 11 deletions(-) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index c022139..cdabc9b 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -90,17 +90,6 @@ dlb_event_enqueue_forward_burst_delayed(void *event_port, const struct rte_event events[], uint16_t num); -uint32_t -dlb_get_queue_depth(struct dlb_eventdev *dlb, - struct dlb_eventdev_queue *queue) -{ - /* DUMMY FOR NOW So "xstats" patch compiles */ - RTE_SET_USED(dlb); - RTE_SET_USED(queue); - - return 0; -} - static int dlb_hw_query_resources(struct dlb_eventdev *dlb) { @@ -3529,6 +3518,249 @@ dlb_event_dequeue_sparse(void *event_port, struct rte_event *ev, uint64_t wait) return dlb_event_dequeue_burst_sparse(event_port, ev, 1, wait); } +static uint32_t +dlb_get_ldb_queue_depth(struct dlb_eventdev *dlb, + struct dlb_eventdev_queue *queue) +{ + struct dlb_hw_dev *handle = &dlb->qm_instance; + struct dlb_get_ldb_queue_depth_args cfg; + struct dlb_cmd_response response; + int ret; + + cfg.queue_id = queue->qm_queue.id; + cfg.response = (uintptr_t)&response; + + ret = dlb_iface_get_ldb_queue_depth(handle, &cfg); + if (ret < 0) { + DLB_LOG_ERR("dlb: get_ldb_queue_depth ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + return ret; + } + + return response.id; +} + +static uint32_t +dlb_get_dir_queue_depth(struct dlb_eventdev *dlb, + struct dlb_eventdev_queue *queue) +{ + struct dlb_hw_dev *handle = &dlb->qm_instance; + struct dlb_get_dir_queue_depth_args cfg; + struct dlb_cmd_response response; + int ret; + + cfg.queue_id = queue->qm_queue.id; + cfg.response = (uintptr_t)&response; + + ret = dlb_iface_get_dir_queue_depth(handle, &cfg); + if (ret < 0) { + DLB_LOG_ERR("dlb: get_dir_queue_depth ret=%d (driver status: %s)\n", + ret, dlb_error_strings[response.status]); + return ret; + } + + return response.id; +} + +uint32_t +dlb_get_queue_depth(struct dlb_eventdev *dlb, + struct dlb_eventdev_queue *queue) +{ + if (queue->qm_queue.is_directed) + return dlb_get_dir_queue_depth(dlb, queue); + else + return dlb_get_ldb_queue_depth(dlb, queue); +} + +static bool +dlb_queue_is_empty(struct dlb_eventdev *dlb, + struct dlb_eventdev_queue *queue) +{ + return dlb_get_queue_depth(dlb, queue) == 0; +} + +static bool +dlb_linked_queues_empty(struct dlb_eventdev *dlb) +{ + int i; + + for (i = 0; i < dlb->num_queues; i++) { + if (dlb->ev_queues[i].num_links == 0) + continue; + if (!dlb_queue_is_empty(dlb, &dlb->ev_queues[i])) + return false; + } + + return true; +} + +static bool +dlb_queues_empty(struct dlb_eventdev *dlb) +{ + int i; + + for (i = 0; i < dlb->num_queues; i++) { + if (!dlb_queue_is_empty(dlb, &dlb->ev_queues[i])) + return false; + } + + return true; +} + +static void +dlb_flush_port(struct rte_eventdev *dev, int port_id) +{ + struct dlb_eventdev *dlb = dlb_pmd_priv(dev); + eventdev_stop_flush_t flush; + struct rte_event ev; + uint8_t dev_id; + void *arg; + int i; + + flush = dev->dev_ops->dev_stop_flush; + dev_id = dev->data->dev_id; + arg = dev->data->dev_stop_flush_arg; + + while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) { + if (flush) + flush(dev_id, ev, arg); + + if (dlb->ev_ports[port_id].qm_port.is_directed) + continue; + + ev.op = RTE_EVENT_OP_RELEASE; + + rte_event_enqueue_burst(dev_id, port_id, &ev, 1); + } + + /* Enqueue any additional outstanding releases */ + ev.op = RTE_EVENT_OP_RELEASE; + + for (i = dlb->ev_ports[port_id].outstanding_releases; i > 0; i--) + rte_event_enqueue_burst(dev_id, port_id, &ev, 1); +} + +static void +dlb_drain(struct rte_eventdev *dev) +{ + struct dlb_eventdev *dlb = dlb_pmd_priv(dev); + struct dlb_eventdev_port *ev_port = NULL; + uint8_t dev_id; + int i;
[dpdk-dev] [PATCH v6 17/23] event/dlb: add enqueue and its burst variants
Add support for enqueue and its variants. Signed-off-by: Timothy McDaniel Reviewed-by: Gage Eads --- doc/guides/eventdevs/dlb.rst | 161 ++ drivers/event/dlb/dlb.c | 682 +++ 2 files changed, 843 insertions(+) diff --git a/doc/guides/eventdevs/dlb.rst b/doc/guides/eventdevs/dlb.rst index f5fb055..12142b1 100644 --- a/doc/guides/eventdevs/dlb.rst +++ b/doc/guides/eventdevs/dlb.rst @@ -157,3 +157,164 @@ type (atomic, ordered, or parallel) is not preserved, and an event's sched_type will be set to ``RTE_SCHED_TYPE_ATOMIC`` when it is dequeued from a directed port. +Flow ID +~~ + +The flow ID field is not preserved in the event when it is scheduled in the +DLB, because the DLB hardware control word format does not have sufficient +space to preserve every event field. As a result, the flow ID specified with +the enqueued event will not be in the dequeued event. If this field is +required, the application should pass it through an out-of-band path (for +example in the mbuf's udata64 field, if the event points to an mbuf) or +reconstruct the flow ID after receiving the event. + +Also, the DLB hardware control word supports a 16-bit flow ID. Since struct +rte_event's flow_id field is 20 bits, the DLB PMD drops the most significant +four bits from the event's flow ID. + +Hardware Credits +~~~ + +DLB uses a hardware credit scheme to prevent software from overflowing hardware +event storage, with each unit of storage represented by a credit. A port spends +a credit to enqueue an event, and hardware refills the ports with credits as the +events are scheduled to ports. Refills come from credit pools, and each port is +a member of a load-balanced credit pool and a directed credit pool. The +load-balanced credits are used to enqueue to load-balanced queues, and directed +credits are used for directed queues. + +A DLB eventdev contains one load-balanced and one directed credit pool. These +pools' sizes are controlled by the nb_events_limit field in struct +rte_event_dev_config. The load-balanced pool is sized to contain +nb_events_limit credits, and the directed pool is sized to contain +nb_events_limit/4 credits. The directed pool size can be overridden with the +num_dir_credits vdev argument, like so: + +.. code-block:: console + + --vdev=dlb1_event,num_dir_credits= + +This can be used if the default allocation is too low or too high for the +specific application needs. The PMD also supports a vdev arg that limits the +max_num_events reported by rte_event_dev_info_get(): + +.. code-block:: console + + --vdev=dlb1_event,max_num_events= + +By default, max_num_events is reported as the total available load-balanced +credits. If multiple DLB-based applications are being used, it may be desirable +to control how many load-balanced credits each application uses, particularly +when application(s) are written to configure nb_events_limit equal to the +reported max_num_events. + +Each port is a member of both credit pools. A port's credit allocation is +defined by its low watermark, high watermark, and refill quanta. These three +parameters are calculated by the dlb PMD like so: + +- The load-balanced high watermark is set to the port's enqueue_depth. + The directed high watermark is set to the minimum of the enqueue_depth and + the directed pool size divided by the total number of ports. +- The refill quanta is set to half the high watermark. +- The low watermark is set to the minimum of 16 and the refill quanta. + +When the eventdev is started, each port is pre-allocated a high watermark's +worth of credits. For example, if an eventdev contains four ports with enqueue +depths of 32 and a load-balanced credit pool size of 4096, each port will start +with 32 load-balanced credits, and there will be 3968 credits available to +replenish the ports. Thus, a single port is not capable of enqueueing up to the +nb_events_limit (without any events being dequeued), since the other ports are +retaining their initial credit allocation; in short, all ports must enqueue in +order to reach the limit. + +If a port attempts to enqueue and has no credits available, the enqueue +operation will fail and the application must retry the enqueue. Credits are +replenished asynchronously by the DLB hardware. + +Software Credits +~~~ + +The DLB is a "closed system" event dev, and the DLB PMD layers a software +credit scheme on top of the hardware credit scheme in order to comply with +the per-port backpressure described in the eventdev API. + +The DLB's hardware scheme is local to a queue/pipeline stage: a port spends a +credit when it enqueues to a queue, and credits are later replenished after the +events are dequeued and released. + +In the software credit scheme, a credit is consumed when a new (.op = +RTE_EVENT_OP_NEW) event is injected into the system, and the credit is +replenished when the event is released from the system (either exp
Re: [dpdk-dev] [PATCH] net/netvsc: use memzone to allocate contiguous physical mem for rndis
On 10/22/2020 1:26 AM, Long Li wrote: From: Long Li When sending data, netvsc assumes the tx_rndis buffer is contiguous and calculates physical addresses based on this assumption. Use memzone to allocate tx_rndis so it's guaranteed that this buffer is physically contiguous. Cc:sta...@dpdk.org Signed-off-by: Long Li Applied to dpdk-next-net/main, thanks.
[dpdk-dev] [PATCH v2 0/2] intel: fix writeback
This patch series is targeted at the v20.11 release. It fixes two separate bugs, on the PF with reducing the latency from 16us to 2us maximum for a single packet, and on the VF with adding an ITR timeout to both allow descriptor aggregation for writeback, and minimize latency, setting the value to the same as the PF for E810 family. v2: subscribe to list and re-send Jesse Brandeburg (2): iavf: Fix performance with writeback policy ice: update writeback policy to reduce latency drivers/net/iavf/iavf_ethdev.c | 13 +++-- drivers/net/ice/ice_ethdev.c | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) -- 2.25.4
[dpdk-dev] [PATCH v2 1/2] iavf: Fix performance with writeback policy
The iavf driver was trying to use writeback on ITR, but was never setting an ITR, so it didn't work. This caused performance to be limited due to too much PCIe traffic and partial writes during most benchmarking workloads. Set the ITR during queue setup, which can be checked at runtime by reading register 0x2800. Setting the value to 2us allows for generally good streaming packet performance while keeping latency down. Fixes: d6bde6b5eae9 ("net/avf: enable Rx interrupt") Reported-by: Brian Johnson Signed-off-by: Jesse Brandeburg --- drivers/net/iavf/iavf_ethdev.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 0ef023c0aee4..44372294066f 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -383,10 +383,19 @@ static int iavf_config_rx_queues_irqs(struct rte_eth_dev *dev, VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) { /* If WB_ON_ITR supports, enable it */ vf->msix_base = IAVF_RX_VEC_START; + /* Set the ITR for index zero, to 2us to make sure that +* we leave time for aggregation to occur, but don't +* increase latency dramatically. +*/ IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTLN1(vf->msix_base - 1), - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | - IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK); + (0 << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | + IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | + (2UL << IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT)); + /* debug - check for success! the return value +* should be 2, offset is 0x2800 +*/ + /* IAVF_READ_REG(hw, IAVF_VFINT_ITRN1(0, 0)); */ } else { /* If no WB_ON_ITR offload flags, need to set * interrupt for descriptor write back. -- 2.25.4
[dpdk-dev] [PATCH v2 2/2] ice: update writeback policy to reduce latency
Just like iavf, setting the value to 2us allows for generally good streaming packet performance while keeping latency down, and generally keeps the performance of the PF and VF interfaces similar. The previous value of 0x10 was making latency on a single packet receive be as much as 16us. Fixes: 65dfc889d86b ("net/ice: support Rx queue interruption") Reported-by: Brian Johnson Signed-off-by: Jesse Brandeburg --- drivers/net/ice/ice_ethdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index c65125ff3260..0df491f93126 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -3345,7 +3345,7 @@ __vsi_queues_bind_intr(struct ice_vsi *vsi, uint16_t msix_vect, PMD_DRV_LOG(INFO, "queue %d is binding to vect %d", base_queue + i, msix_vect); /* set ITR0 value */ - ICE_WRITE_REG(hw, GLINT_ITR(0, msix_vect), 0x10); + ICE_WRITE_REG(hw, GLINT_ITR(0, msix_vect), 0x2); ICE_WRITE_REG(hw, QINT_RQCTL(base_queue + i), val); ICE_WRITE_REG(hw, QINT_TQCTL(base_queue + i), val_tx); } -- 2.25.4
Re: [dpdk-dev] [PATCH v3 01/11] net/bnxt: add stingray support to core layer
On Fri, Oct 23, 2020 at 3:54 AM Ferruh Yigit wrote: > > On 10/22/2020 11:05 PM, Ajit Khaparde wrote: > > diff --git a/drivers/net/bnxt/tf_core/cfa_resource_types.h > > b/drivers/net/bnxt/tf_core/cfa_resource_types.h > > index 19838c393d..412204f7ed 100644 > > --- a/drivers/net/bnxt/tf_core/cfa_resource_types.h > > +++ b/drivers/net/bnxt/tf_core/cfa_resource_types.h > > @@ -1,5 +1,5 @@ > > /* SPDX-License-Identifier: BSD-3-Clause > > - * Copyright(c) 2019-2020 Broadcom > > + * Copyright(c) 2014-2018 Broadcom > >* All rights reserved. > >*/ > > Is this intentional, pulling back the copyright years, should I fix in the > tree? That's not right. Can you please fix it in the tree? Thanks
Re: [dpdk-dev] [PATCH v6 2/4] lib/hash: integrate RCU QSBR
Hi Yipeng, I have addressed the comments. Can you please take a look? Thank you! > On Oct 21, 2020, at 5:50 PM, Dharmik Thakkar wrote: > > Currently, users have to use external RCU mechanisms to free resources > when using lock free hash algorithm. > > Integrate RCU QSBR process to make it easier for the applications to use > lock free algorithm. > Refer to RCU documentation to understand various aspects of > integrating RCU library into other libraries. > > Suggested-by: Honnappa Nagarahalli > Signed-off-by: Dharmik Thakkar > Reviewed-by: Ruifeng Wang > Acked-by: Ray Kinsella > ---
[dpdk-dev] [PATCH V2 1/2] net/netvsc: allow setting rx and tx copy break
From: Stephen Hemminger The values for Rx and Tx copy break should be tunable rather than hard coded constants. The rx_copybreak sets the threshold where the driver uses an external mbuf to avoid having to copy data. Setting 0 for copybreak will cause driver to always create an external mbuf. Setting a value greater than the MTU would prevent it from ever making an external mbuf and always copy. The default value is 256 (bytes). Likewise the tx_copybreak sets the threshold where the driver aggregates multiple small packets into one request. If tx_copybreak is 0 then each packet goes as a VMBus request (no copying). If tx_copybreak is set larger than the MTU, then all packets smaller than the chunk size of the VMBus send buffer will be copied; larger packets always have to go as a single direct request. The default value is 512 (bytes). Signed-off-by: Stephen Hemminger Signed-off-by: Long Li --- doc/guides/nics/netvsc.rst | 17 drivers/net/netvsc/hn_ethdev.c | 48 +- drivers/net/netvsc/hn_rxtx.c | 8 +++--- drivers/net/netvsc/hn_var.h| 5 4 files changed, 60 insertions(+), 18 deletions(-) diff --git a/doc/guides/nics/netvsc.rst b/doc/guides/nics/netvsc.rst index 6dbb9a5513..5a68ffa8a3 100644 --- a/doc/guides/nics/netvsc.rst +++ b/doc/guides/nics/netvsc.rst @@ -116,3 +116,20 @@ The user can specify below argument in devargs. values save CPU cycles. This parameter is in microseconds. If the value is too large or too small it will be ignored by the host. (Default: 50) + +#. ``rx_copybreak``: + +The rx_copybreak sets the threshold where the driver uses an external +mbuf to avoid having to copy data. Setting 0 for copybreak will cause +driver to always create an external mbuf. Setting a value greater than +the MTU would prevent it from ever making an external mbuf and always +copy. The default value is 256 (bytes). + +#. ``tx_copybreak``: + +The tx_copybreak sets the threshold where the driver aggregates +multiple small packets into one request. If tx_copybreak is 0 then +each packet goes as a VMBus request (no copying). If tx_copybreak is +set larger than the MTU, then all packets smaller than the chunk size +of the VMBus send buffer will be copied; larger packets always have to +go as a single direct request. The default value is 512 (bytes). diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index 4a01f1d464..2de951bae9 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -45,6 +45,10 @@ DEV_RX_OFFLOAD_VLAN_STRIP | \ DEV_RX_OFFLOAD_RSS_HASH) +#define NETVSC_ARG_LATENCY "latency" +#define NETVSC_ARG_RXBREAK "rx_copybreak" +#define NETVSC_ARG_TXBREAK "tx_copybreak" + struct hn_xstats_name_off { char name[RTE_ETH_XSTATS_NAME_SIZE]; unsigned int offset; @@ -136,24 +140,32 @@ eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) eth_dev->intr_handle = NULL; } -/* handle "latency=X" from devargs */ -static int hn_set_latency(const char *key, const char *value, void *opaque) +static int hn_set_parameter(const char *key, const char *value, void *opaque) { struct hn_data *hv = opaque; char *endp = NULL; - unsigned long lat; - - errno = 0; - lat = strtoul(value, &endp, 0); + unsigned long v; + v = strtoul(value, &endp, 0); if (*value == '\0' || *endp != '\0') { PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value); return -EINVAL; } - PMD_DRV_LOG(DEBUG, "set latency %lu usec", lat); + if (!strcmp(key, NETVSC_ARG_LATENCY)) { + /* usec to nsec */ + hv->latency = v * 1000; + PMD_DRV_LOG(DEBUG, "set latency %u usec", hv->latency); + } else if (!strcmp(key, NETVSC_ARG_RXBREAK)) { + hv->rx_copybreak = v; + PMD_DRV_LOG(DEBUG, "rx copy break set to %u", + hv->rx_copybreak); + } else if (!strcmp(key, NETVSC_ARG_TXBREAK)) { + hv->tx_copybreak = v; + PMD_DRV_LOG(DEBUG, "tx copy break set to %u", + hv->tx_copybreak); + } - hv->latency = lat * 1000; /* usec to nsec */ return 0; } @@ -163,11 +175,13 @@ static int hn_parse_args(const struct rte_eth_dev *dev) struct hn_data *hv = dev->data->dev_private; struct rte_devargs *devargs = dev->device->devargs; static const char * const valid_keys[] = { - "latency", + NETVSC_ARG_LATENCY, + NETVSC_ARG_RXBREAK, + NETVSC_ARG_TXBREAK, NULL }; struct rte_kvargs *kvlist; - int ret; + unsigned int i, ret = 0; if (!devargs) return 0; @@ -181,9 +195,14 @@ static int hn_parse_args(const
[dpdk-dev] [PATCH V2 2/2] net/netvsc: introduce driver parameter to control the use of external mbuf on receiving data
From: Long Li When receiving packets, netvsp puts data in a buffer mapped through UIO. Depending on packet size, netvsc may attach the buffer as an external mbuf. This is not a problem if this mbuf is consumed in the application, and the application can correctly read data out of an external mbuf. However, there are two problems with data in an external mbuf. 1. Due to the limitation of the kernel UIO implementation, physical address of this external buffer is not exposed to the user-mode. If this mbuf is passed to another driver, the other driver is unable to map this buffer to iova. 2. Some DPDK applications are not aware of external mbuf, and may bug when they receive an mbuf with external buffer attached. Introduce a driver parameter "rx_extmbuf_enable" to control if netvsc should use external mbuf for receiving packets. The default value is 0. (netvsc doesn't use external mbuf, it always allocates mbuf and copy data to mbuf) A non-zero value tells netvsc to attach external buffers to mbuf on receiving packets, thus avoid copying memory. Signed-off-by: Long Li --- doc/guides/nics/netvsc.rst | 8 drivers/net/netvsc/hn_ethdev.c | 7 +++ drivers/net/netvsc/hn_rxtx.c | 2 +- drivers/net/netvsc/hn_var.h| 3 +++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/netvsc.rst b/doc/guides/nics/netvsc.rst index 5a68ffa8a3..19f9940fe6 100644 --- a/doc/guides/nics/netvsc.rst +++ b/doc/guides/nics/netvsc.rst @@ -133,3 +133,11 @@ The user can specify below argument in devargs. set larger than the MTU, then all packets smaller than the chunk size of the VMBus send buffer will be copied; larger packets always have to go as a single direct request. The default value is 512 (bytes). + +#. ``rx_extmbuf_enable``: +The rx_extmbuf_enable is used to control if netvsc should use external +mbuf for receiving packets. The default value is 0. (netvsc doesn't use +external mbuf, it always allocates mbuf and copy received data to mbuf) +A non-zero value tells netvsc to attach external buffers to mbuf on +receiving packets, thus avoid copying memory. Use of external buffers +requires the application is able to read data from external mbuf. diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index 2de951bae9..2d78a0118c 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -48,6 +48,7 @@ #define NETVSC_ARG_LATENCY "latency" #define NETVSC_ARG_RXBREAK "rx_copybreak" #define NETVSC_ARG_TXBREAK "tx_copybreak" +#define NETVSC_ARG_RX_EXTMBUF_ENABLE "rx_extmbuf_enable" struct hn_xstats_name_off { char name[RTE_ETH_XSTATS_NAME_SIZE]; @@ -164,6 +165,10 @@ static int hn_set_parameter(const char *key, const char *value, void *opaque) hv->tx_copybreak = v; PMD_DRV_LOG(DEBUG, "tx copy break set to %u", hv->tx_copybreak); + } else if (!strcmp(key, NETVSC_ARG_RX_EXTMBUF_ENABLE)) { + hv->rx_extmbuf_enable = v; + PMD_DRV_LOG(DEBUG, "rx extmbuf enable set to %u", + hv->rx_extmbuf_enable); } return 0; @@ -178,6 +183,7 @@ static int hn_parse_args(const struct rte_eth_dev *dev) NETVSC_ARG_LATENCY, NETVSC_ARG_RXBREAK, NETVSC_ARG_TXBREAK, + NETVSC_ARG_RX_EXTMBUF_ENABLE, NULL }; struct rte_kvargs *kvlist; @@ -987,6 +993,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) hv->latency = HN_CHAN_LATENCY_NS; hv->rx_copybreak = HN_RXCOPY_THRESHOLD; hv->tx_copybreak = HN_TXCOPY_THRESHOLD; + hv->rx_extmbuf_enable = HN_RX_EXTMBUF_ENABLE; hv->max_queues = 1; rte_rwlock_init(&hv->vf_lock); diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index ce75b4d092..35c046b1a7 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -565,7 +565,7 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, * For large packets, avoid copy if possible but need to keep * some space available in receive area for later packets. */ - if (dlen > hv->rx_copybreak && + if (hv->rx_extmbuf_enable && dlen > hv->rx_copybreak && (uint32_t)rte_atomic32_read(&rxq->rxbuf_outstanding) < hv->rxbuf_section_cnt / 2) { struct rte_mbuf_ext_shared_info *shinfo; diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h index c1bdafb413..9dcb669f7a 100644 --- a/drivers/net/netvsc/hn_var.h +++ b/drivers/net/netvsc/hn_var.h @@ -26,6 +26,8 @@ #define HN_TXCOPY_THRESHOLD512 #define HN_RXCOPY_THRESHOLD256 +#define HN_RX_EXTMBUF_ENABLE 0 + /* Buffers need to be aligned */ #ifndef PAGE_SIZE #define PAGE_SIZE 4096 @@ -118,6 +120,7 @@ struct hn_data { struct rte_mem_resource *rxbuf_re
Re: [dpdk-dev] [PATCH v6 2/4] lib/hash: integrate RCU QSBR
> -Original Message- > From: Dharmik Thakkar > Sent: Wednesday, October 21, 2020 3:50 PM > To: Wang, Yipeng1 ; Gobriel, Sameh > ; Richardson, Bruce ; > Ray Kinsella ; Neil Horman > Cc: dev@dpdk.org; n...@arm.com; Dharmik Thakkar > > Subject: [PATCH v6 2/4] lib/hash: integrate RCU QSBR > > Currently, users have to use external RCU mechanisms to free resources when > using lock free hash algorithm. > > Integrate RCU QSBR process to make it easier for the applications to use lock > free algorithm. > Refer to RCU documentation to understand various aspects of integrating RCU > library into other libraries. > > Suggested-by: Honnappa Nagarahalli > Signed-off-by: Dharmik Thakkar > Reviewed-by: Ruifeng Wang > Acked-by: Ray Kinsella > --- [Wang, Yipeng] Thanks for revising the code. Please check the checkpatch reported coding style issues to fix. After that Acked-by: Yipeng Wang
Re: [dpdk-dev] [PATCH v6 4/4] test/hash: add tests for integrated RCU QSBR
> -Original Message- > From: Dharmik Thakkar > Sent: Wednesday, October 21, 2020 3:50 PM > To: Wang, Yipeng1 ; Gobriel, Sameh > ; Richardson, Bruce > Cc: dev@dpdk.org; n...@arm.com; Dharmik Thakkar > > Subject: [PATCH v6 4/4] test/hash: add tests for integrated RCU QSBR > > Add functional and performance tests for the integrated RCU QSBR. > > Suggested-by: Honnappa Nagarahalli > Signed-off-by: Dharmik Thakkar > Reviewed-by: Ruifeng Wang > --- [Wang, Yipeng] Also please confirm the checkpatch report see if it is not a false-positive. Then: Acked-by: Yipeng Wang
Re: [dpdk-dev] [PATCH v3 4/5] test/ring: add functional tests for zero copy APIs
> > > > Add functional tests for zero copy APIs. Test enqueue/dequeue > > functions are created using the zero copy APIs to fit into the > > existing testing method. > > > > Signed-off-by: Honnappa Nagarahalli > > Reviewed-by: Dharmik Thakkar > > --- > > app/test/test_ring.c | 196 > > +++ > > app/test/test_ring.h | 42 ++ > > 2 files changed, 238 insertions(+) > > > > > diff --git a/app/test/test_ring.h b/app/test/test_ring.h index > > 16697ee02..33c8a31fe 100644 > > --- a/app/test/test_ring.h > > +++ b/app/test/test_ring.h > > @@ -53,6 +53,48 @@ test_ring_inc_ptr(void **obj, int esize, unsigned int > n) > > (n * esize / sizeof(uint32_t))); } > > > > +static inline void > > +test_ring_mem_copy(void *dst, void * const *src, int esize, unsigned > > +int num) { > > + size_t temp_sz; > > + > > + temp_sz = num * sizeof(void *); > > + if (esize != -1) > > + temp_sz = esize * num; > > + > > + memcpy(dst, src, temp_sz); > > +} > > + > > +/* Copy to the ring memory */ > > +static inline void > > +test_ring_copy_to(struct rte_ring_zc_data *zcd, void * const *src, int > esize, > > + unsigned int num) > > +{ > > + test_ring_mem_copy(zcd->ptr1, src, esize, zcd->n1); > > + if (zcd->n1 != num) { > > + if (esize == -1) > > + src = src + zcd->n1; > > + else > > + src = (void * const *)(((const uint32_t *)src) + > > + (zcd->n1 * esize / sizeof(uint32_t))); > > Why just not: > src = test_ring_inc_ptr(src, esize, zcd->n1); ? test_enqdeq_impl requires the enqueue APIs to have 'const' pointer for data to be copied to the ring. Because of this, the 'src' parameter needs to be a 'const'. If I change test_ring_inc_ptr to take const parameter, a lot of things in test_ring.c break as test_ring_inc_ptr is called with lot of non-const pointers. > > Acked-by: Konstantin Ananyev > > > + test_ring_mem_copy(zcd->ptr2, src, > > + esize, num - zcd->n1); > > + } > > +} > > + > > +/* Copy from the ring memory */ > > +static inline void > > +test_ring_copy_from(struct rte_ring_zc_data *zcd, void *dst, int esize, > > + unsigned int num) > > +{ > > + test_ring_mem_copy(dst, zcd->ptr1, esize, zcd->n1); > > + > > + if (zcd->n1 != num) { > > + dst = test_ring_inc_ptr(dst, esize, zcd->n1); > > + test_ring_mem_copy(dst, zcd->ptr2, esize, num - zcd->n1); > > + } > > +} > > + > > static __rte_always_inline unsigned int test_ring_enqueue(struct > > rte_ring *r, void **obj, int esize, unsigned int n, > > unsigned int api_type) > > -- > > 2.17.1
[dpdk-dev] [PATCH v9 01/10] eal: add new x86 cpuid support for WAITPKG
Add a new CPUID flag indicating processor support for UMONITOR/UMWAIT and TPAUSE instructions instruction. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: Konstantin Ananyev --- lib/librte_eal/x86/include/rte_cpuflags.h | 1 + lib/librte_eal/x86/rte_cpuflags.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lib/librte_eal/x86/include/rte_cpuflags.h b/lib/librte_eal/x86/include/rte_cpuflags.h index c1d20364d1..848ba9cbfb 100644 --- a/lib/librte_eal/x86/include/rte_cpuflags.h +++ b/lib/librte_eal/x86/include/rte_cpuflags.h @@ -132,6 +132,7 @@ enum rte_cpu_flag_t { RTE_CPUFLAG_MOVDIR64B, /**< Direct Store Instructions 64B */ RTE_CPUFLAG_AVX512VP2INTERSECT, /**< AVX512 Two Register Intersection */ + RTE_CPUFLAG_WAITPKG,/**< UMONITOR/UMWAIT/TPAUSE */ /* The last item */ RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ }; diff --git a/lib/librte_eal/x86/rte_cpuflags.c b/lib/librte_eal/x86/rte_cpuflags.c index 30439e7951..0325c4b93b 100644 --- a/lib/librte_eal/x86/rte_cpuflags.c +++ b/lib/librte_eal/x86/rte_cpuflags.c @@ -110,6 +110,8 @@ const struct feature_entry rte_cpu_feature_table[] = { FEAT_DEF(AVX512F, 0x0007, 0, RTE_REG_EBX, 16) FEAT_DEF(RDSEED, 0x0007, 0, RTE_REG_EBX, 18) + FEAT_DEF(WAITPKG, 0x0007, 0, RTE_REG_ECX, 5) + FEAT_DEF(LAHF_SAHF, 0x8001, 0, RTE_REG_ECX, 0) FEAT_DEF(LZCNT, 0x8001, 0, RTE_REG_ECX, 4) -- 2.17.1
[dpdk-dev] [PATCH v9 02/10] eal: add power management intrinsics
Add two new power management intrinsics, and provide an implementation in eal/x86 based on UMONITOR/UMWAIT instructions. The instructions are implemented as raw byte opcodes because there is not yet widespread compiler support for these instructions. The power management instructions provide an architecture-specific function to either wait until a specified TSC timestamp is reached, or optionally wait until either a TSC timestamp is reached or a memory location is written to. The monitor function also provides an optional comparison, to avoid sleeping when the expected write has already happened, and no more writes are expected. For more details, please refer to Intel(R) 64 and IA-32 Architectures Software Developer's Manual, Volume 2. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Christensen Acked-by: Jerin Jacob Acked-by: Konstantin Ananyev Acked-by: Ruifeng Wang --- lib/librte_eal/arm/include/meson.build| 1 + .../arm/include/rte_power_intrinsics.h| 60 .../include/generic/rte_power_intrinsics.h| 111 ++ lib/librte_eal/include/meson.build| 1 + lib/librte_eal/ppc/include/meson.build| 1 + .../ppc/include/rte_power_intrinsics.h| 60 lib/librte_eal/x86/include/meson.build| 1 + .../x86/include/rte_power_intrinsics.h| 135 ++ 8 files changed, 370 insertions(+) create mode 100644 lib/librte_eal/arm/include/rte_power_intrinsics.h create mode 100644 lib/librte_eal/include/generic/rte_power_intrinsics.h create mode 100644 lib/librte_eal/ppc/include/rte_power_intrinsics.h create mode 100644 lib/librte_eal/x86/include/rte_power_intrinsics.h diff --git a/lib/librte_eal/arm/include/meson.build b/lib/librte_eal/arm/include/meson.build index 73b750a18f..c6a9f70d73 100644 --- a/lib/librte_eal/arm/include/meson.build +++ b/lib/librte_eal/arm/include/meson.build @@ -20,6 +20,7 @@ arch_headers = files( 'rte_pause_32.h', 'rte_pause_64.h', 'rte_pause.h', + 'rte_power_intrinsics.h', 'rte_prefetch_32.h', 'rte_prefetch_64.h', 'rte_prefetch.h', diff --git a/lib/librte_eal/arm/include/rte_power_intrinsics.h b/lib/librte_eal/arm/include/rte_power_intrinsics.h new file mode 100644 index 00..a4a1bc1159 --- /dev/null +++ b/lib/librte_eal/arm/include/rte_power_intrinsics.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _RTE_POWER_INTRINSIC_ARM_H_ +#define _RTE_POWER_INTRINSIC_ARM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "generic/rte_power_intrinsics.h" + +/** + * This function is not supported on ARM. + */ +static inline void +rte_power_monitor(const volatile void *p, const uint64_t expected_value, + const uint64_t value_mask, const uint64_t tsc_timestamp, + const uint8_t data_sz) +{ + RTE_SET_USED(p); + RTE_SET_USED(expected_value); + RTE_SET_USED(value_mask); + RTE_SET_USED(tsc_timestamp); + RTE_SET_USED(data_sz); +} + +/** + * This function is not supported on ARM. + */ +static inline void +rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value, + const uint64_t value_mask, const uint64_t tsc_timestamp, + const uint8_t data_sz, rte_spinlock_t *lck) +{ + RTE_SET_USED(p); + RTE_SET_USED(expected_value); + RTE_SET_USED(value_mask); + RTE_SET_USED(tsc_timestamp); + RTE_SET_USED(lck); + RTE_SET_USED(data_sz); +} + +/** + * This function is not supported on ARM. + */ +static inline void +rte_power_pause(const uint64_t tsc_timestamp) +{ + RTE_SET_USED(tsc_timestamp); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_POWER_INTRINSIC_ARM_H_ */ diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h new file mode 100644 index 00..fb897d9060 --- /dev/null +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _RTE_POWER_INTRINSIC_H_ +#define _RTE_POWER_INTRINSIC_H_ + +#include + +#include +#include + +/** + * @file + * Advanced power management operations. + * + * This file define APIs for advanced power management, + * which are architecture-dependent. + */ + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Monitor specific address for changes. This will cause the CPU to enter an + * architecture-defined optimized power state until either the specified + * memory address is written to, a certain TSC timestamp is reached, or other + * reasons cause the CPU to wake up. + * + * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If + * mask is non-zero, the current value pointed to by the `p` pointer will be + * checked
[dpdk-dev] [PATCH v9 00/10] Add PMD power mgmt
This patchset proposes a simple API for Ethernet drivers to cause the CPU to enter a power-optimized state while waiting for packets to arrive, along with a set of generic intrinsics that facilitate that. This is achieved through cooperation with the NIC driver that will allow us to know address of wake up event, and wait for writes on it. On IA, this is achieved through using UMONITOR/UMWAIT instructions. They are used in their raw opcode form because there is no widespread compiler support for them yet. Still, the API is made generic enough to hopefully support other architectures, if they happen to implement similar instructions. To achieve power savings, there is a very simple mechanism used: we're counting empty polls, and if a certain threshold is reached, we get the address of next RX ring descriptor from the NIC driver, arm the monitoring hardware, and enter a power-optimized state. We will then wake up when either a timeout happens, or a write happens (or generally whenever CPU feels like waking up - this is platform- specific), and proceed as normal. The empty poll counter is reset whenever we actually get packets, so we only go to sleep when we know nothing is going on. The mechanism is generic which can be used for any write back descriptor. Why are we putting it into ethdev as opposed to leaving this up to the application? Our customers specifically requested a way to do it wit minimal changes to the application code. The current approach allows to just flip a switch and automatically have power savings. - Only 1:1 core to queue mapping is supported, meaning that each lcore must at most handle RX on a single queue - Support 3 type policies. UMWAIT/PAUSE/Frequency_Scale - Power management is enabled per-queue - The API doesn't extend to other device types Liang Ma (10): eal: add new x86 cpuid support for WAITPKG eal: add power management intrinsics eal: add intrinsics support check infrastructure ethdev: add simple power management API power: add PMD power management API and callback net/ixgbe: implement power management API net/i40e: implement power management API net/ice: implement power management API examples/l3fwd-power: enable PMD power mgmt doc: update programmer's guide for power library doc/guides/prog_guide/power_man.rst | 42 +++ doc/guides/rel_notes/release_20_11.rst| 16 + .../sample_app_ug/l3_forward_power_man.rst| 13 + drivers/net/i40e/i40e_ethdev.c| 1 + drivers/net/i40e/i40e_rxtx.c | 26 ++ drivers/net/i40e/i40e_rxtx.h | 2 + drivers/net/ice/ice_ethdev.c | 1 + drivers/net/ice/ice_rxtx.c| 26 ++ drivers/net/ice/ice_rxtx.h| 2 + drivers/net/ixgbe/ixgbe_ethdev.c | 1 + drivers/net/ixgbe/ixgbe_rxtx.c| 25 ++ drivers/net/ixgbe/ixgbe_rxtx.h| 2 + examples/l3fwd-power/main.c | 46 ++- lib/librte_eal/arm/include/meson.build| 1 + .../arm/include/rte_power_intrinsics.h| 60 lib/librte_eal/arm/rte_cpuflags.c | 6 + lib/librte_eal/include/generic/rte_cpuflags.h | 26 ++ .../include/generic/rte_power_intrinsics.h| 123 +++ lib/librte_eal/include/meson.build| 1 + lib/librte_eal/ppc/include/meson.build| 1 + .../ppc/include/rte_power_intrinsics.h| 60 lib/librte_eal/ppc/rte_cpuflags.c | 7 + lib/librte_eal/version.map| 1 + lib/librte_eal/x86/include/meson.build| 1 + lib/librte_eal/x86/include/rte_cpuflags.h | 1 + .../x86/include/rte_power_intrinsics.h| 135 lib/librte_eal/x86/rte_cpuflags.c | 14 + lib/librte_ethdev/rte_ethdev.c| 23 ++ lib/librte_ethdev/rte_ethdev.h| 28 ++ lib/librte_ethdev/rte_ethdev_driver.h | 28 ++ lib/librte_ethdev/version.map | 1 + lib/librte_power/meson.build | 5 +- lib/librte_power/rte_power_pmd_mgmt.c | 320 ++ lib/librte_power/rte_power_pmd_mgmt.h | 92 + lib/librte_power/version.map | 4 + 35 files changed, 1138 insertions(+), 3 deletions(-) create mode 100644 lib/librte_eal/arm/include/rte_power_intrinsics.h create mode 100644 lib/librte_eal/include/generic/rte_power_intrinsics.h create mode 100644 lib/librte_eal/ppc/include/rte_power_intrinsics.h create mode 100644 lib/librte_eal/x86/include/rte_power_intrinsics.h create mode 100644 lib/librte_power/rte_power_pmd_mgmt.c create mode 100644 lib/librte_power/rte_power_pmd_mgmt.h -- 2.17.1
[dpdk-dev] [PATCH v9 04/10] ethdev: add simple power management API
Add a simple API to allow getting address of next RX descriptor from the PMD, as well as release notes information. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: Konstantin Ananyev --- Notes: v8: - Rename version map file name. v7: - Fixed queue ID validation - Fixed documentation v6: - Rebase on top of latest main - Ensure the API checks queue ID (Konstantin) - Removed accidental inclusion of unrelated release notes v5: - Bring function format in line with other functions in the file - Ensure the API is supported by the driver before calling it (Konstantin) --- doc/guides/rel_notes/release_20_11.rst | 5 + lib/librte_ethdev/rte_ethdev.c | 23 + lib/librte_ethdev/rte_ethdev.h | 28 ++ lib/librte_ethdev/rte_ethdev_driver.h | 28 ++ lib/librte_ethdev/version.map | 1 + 5 files changed, 85 insertions(+) diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index d8ac359e51..2827a000db 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -139,6 +139,11 @@ New Features Hairpin Tx part flow rules can be inserted explicitly. New API is added to get the hairpin peer ports list. +* **ethdev: add 1 new EXPERIMENTAL API for PMD power management.** + + * ``rte_eth_get_wake_addr()`` + * add new eth_dev_ops ``get_wake_addr`` + * **Updated Broadcom bnxt driver.** Updated the Broadcom bnxt driver with new features and improvements, including: diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index b12bb3854d..4f3115fe8e 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -5138,6 +5138,29 @@ rte_eth_tx_burst_mode_get(uint16_t port_id, uint16_t queue_id, dev->dev_ops->tx_burst_mode_get(dev, queue_id, mode)); } +int +rte_eth_get_wake_addr(uint16_t port_id, uint16_t queue_id, + volatile void **wake_addr, uint64_t *expected, uint64_t *mask, + uint8_t *data_sz) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_wake_addr, -ENOTSUP); + + if (queue_id >= dev->data->nb_rx_queues) { + RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + return eth_err(port_id, + dev->dev_ops->get_wake_addr(dev->data->rx_queues[queue_id], + wake_addr, expected, mask, data_sz)); +} + int rte_eth_dev_set_mc_addr_list(uint16_t port_id, struct rte_ether_addr *mc_addr_set, diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h index e341a08817..11559e7bc8 100644 --- a/lib/librte_ethdev/rte_ethdev.h +++ b/lib/librte_ethdev/rte_ethdev.h @@ -4364,6 +4364,34 @@ __rte_experimental int rte_eth_tx_burst_mode_get(uint16_t port_id, uint16_t queue_id, struct rte_eth_burst_mode *mode); +/** + * Retrieve the wake up address for the receive queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The Rx queue on the Ethernet device for which information will be + * retrieved. + * @param wake_addr + * The pointer to the address which will be monitored. + * @param expected + * The pointer to value to be expected when descriptor is set. + * @param mask + * The pointer to comparison bitmask for the expected value. + * @param data_sz + * The pointer to data size for the expected value and comparison bitmask. + * + * @return + * - 0: Success. + * -ENOTSUP: Operation not supported. + * -EINVAL: Invalid parameters. + * -ENODEV: Invalid port ID. + */ +__rte_experimental +int rte_eth_get_wake_addr(uint16_t port_id, uint16_t queue_id, + volatile void **wake_addr, uint64_t *expected, uint64_t *mask, + uint8_t *data_sz); + /** * Retrieve device registers and register attributes (number of registers and * register size) diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h index c63b9f7eb7..d7548dfe74 100644 --- a/lib/librte_ethdev/rte_ethdev_driver.h +++ b/lib/librte_ethdev/rte_ethdev_driver.h @@ -752,6 +752,32 @@ typedef int (*eth_hairpin_queue_peer_unbind_t) (struct rte_eth_dev *dev, uint16_t cur_queue, uint32_t direction); /**< @internal Unbind peer queue from the current queue. */ +/** + * @internal + * Get address of memory location whose contents will change whenever there is + * new data to be received on an RX queue. + * + * @param rxq + * Ethdev queue pointer. + * @param tail_desc_addr + * The pointer point to where the address will be stored. + * @param expected + * The pointer point to value to be expected when descr
[dpdk-dev] [PATCH v9 05/10] power: add PMD power management API and callback
Add a simple on/off switch that will enable saving power when no packets are arriving. It is based on counting the number of empty polls and, when the number reaches a certain threshold, entering an architecture-defined optimized power state that will either wait until a TSC timestamp expires, or when packets arrive. This API mandates a core-to-single-queue mapping (that is, multiple queued per device are supported, but they have to be polled on different cores). This design is using PMD RX callbacks. 1. UMWAIT/UMONITOR: When a certain threshold of empty polls is reached, the core will go into a power optimized sleep while waiting on an address of next RX descriptor to be written to. 2. Pause instruction Instead of move the core into deeper C state, this method uses the pause instruction to avoid busy polling. 3. Frequency scaling Reuse existing DPDK power library to scale up/down core frequency depending on traffic volume. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Hunt Acked-by: Konstantin Ananyev --- Notes: v8: - Rename version map file name v7: - Fixed race condition (Konstantin) - Slight rework of the structure of monitor code - Added missing inline for wakeup v6: - Added wakeup mechanism for UMWAIT - Removed memory allocation (everything is now allocated statically) - Fixed various typos and comments - Check for invalid queue ID - Moved release notes to this patch v5: - Make error checking more robust - Prevent initializing scaling if ACPI or PSTATE env wasn't set - Prevent initializing UMWAIT path if PMD doesn't support get_wake_addr - Add some debug logging - Replace x86-specific code path to generic path using the intrinsic check --- doc/guides/rel_notes/release_20_11.rst | 11 + lib/librte_power/meson.build | 5 +- lib/librte_power/rte_power_pmd_mgmt.c | 320 + lib/librte_power/rte_power_pmd_mgmt.h | 92 +++ lib/librte_power/version.map | 4 + 5 files changed, 430 insertions(+), 2 deletions(-) create mode 100644 lib/librte_power/rte_power_pmd_mgmt.c create mode 100644 lib/librte_power/rte_power_pmd_mgmt.h diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index 2827a000db..5f32a5da1d 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -350,6 +350,17 @@ New Features * Replaced ``--scalar`` command-line option with ``--alg=``, to allow the user to select the desired classify method. +* **Add PMD power management mechanism** + + 3 new Ethernet PMD power management mechanism is added through existing + RX callback infrastructure. + + * Add power saving scheme based on UMWAIT instruction (x86 only) + * Add power saving scheme based on ``rte_pause()`` + * Add power saving scheme based on frequency scaling through the power library + * Add new EXPERIMENTAL API ``rte_power_pmd_mgmt_queue_enable()`` + * Add new EXPERIMENTAL API ``rte_power_pmd_mgmt_queue_disable()`` + Removed Items - diff --git a/lib/librte_power/meson.build b/lib/librte_power/meson.build index 78c031c943..cc3c7a8646 100644 --- a/lib/librte_power/meson.build +++ b/lib/librte_power/meson.build @@ -9,6 +9,7 @@ sources = files('rte_power.c', 'power_acpi_cpufreq.c', 'power_kvm_vm.c', 'guest_channel.c', 'rte_power_empty_poll.c', 'power_pstate_cpufreq.c', + 'rte_power_pmd_mgmt.c', 'power_common.c') -headers = files('rte_power.h','rte_power_empty_poll.h') -deps += ['timer'] +headers = files('rte_power.h','rte_power_empty_poll.h','rte_power_pmd_mgmt.h') +deps += ['timer' ,'ethdev'] diff --git a/lib/librte_power/rte_power_pmd_mgmt.c b/lib/librte_power/rte_power_pmd_mgmt.c new file mode 100644 index 00..0dcaddc3bd --- /dev/null +++ b/lib/librte_power/rte_power_pmd_mgmt.c @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include "rte_power_pmd_mgmt.h" + +#define EMPTYPOLL_MAX 512 + +/** + * Possible power management states of an ethdev port. + */ +enum pmd_mgmt_state { + /** Device power management is disabled. */ + PMD_MGMT_DISABLED = 0, + /** Device power management is enabled. */ + PMD_MGMT_ENABLED, +}; + +struct pmd_queue_cfg { + enum pmd_mgmt_state pwr_mgmt_state; + /**< State of power management for this queue */ + enum rte_power_pmd_mgmt_type cb_mode; + /**< Callback mode for this queue */ + const struct rte_eth_rxtx_callback *cur_cb; + /**< Callback instance */ + rte_spinlock_t umwait_lock; + /**< Per-queue status lock - used only for UMWAIT mode */ + volatile void *wait_addr; + /**< UMWAIT wakeup address */ + uint64_
[dpdk-dev] [PATCH v9 03/10] eal: add intrinsics support check infrastructure
Currently, it is not possible to check support for intrinsics that are platform-specific, cannot be abstracted in a generic way, or do not have support on all architectures. The CPUID flags can be used to some extent, but they are only defined for their platform, while intrinsics will be available to all code as they are in generic headers. This patch introduces infrastructure to check support for certain platform-specific intrinsics, and adds support for checking support for IA power management-related intrinsics for UMWAIT/UMONITOR and TPAUSE. Signed-off-by: Anatoly Burakov Signed-off-by: Liang Ma Acked-by: David Christensen Acked-by: Jerin Jacob Acked-by: Ruifeng Wang Acked-by: Ray Kinsella --- Notes: v6: - Fix the comments v8: - Rename eal version.map --- lib/librte_eal/arm/rte_cpuflags.c | 6 + lib/librte_eal/include/generic/rte_cpuflags.h | 26 +++ .../include/generic/rte_power_intrinsics.h| 12 + lib/librte_eal/ppc/rte_cpuflags.c | 7 + lib/librte_eal/version.map| 1 + lib/librte_eal/x86/rte_cpuflags.c | 12 + 6 files changed, 64 insertions(+) diff --git a/lib/librte_eal/arm/rte_cpuflags.c b/lib/librte_eal/arm/rte_cpuflags.c index 7b257b7873..e3a53bcece 100644 --- a/lib/librte_eal/arm/rte_cpuflags.c +++ b/lib/librte_eal/arm/rte_cpuflags.c @@ -151,3 +151,9 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) return NULL; return rte_cpu_feature_table[feature].name; } + +void +rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics) +{ + memset(intrinsics, 0, sizeof(*intrinsics)); +} diff --git a/lib/librte_eal/include/generic/rte_cpuflags.h b/lib/librte_eal/include/generic/rte_cpuflags.h index 872f0ebe3e..28a5aecde8 100644 --- a/lib/librte_eal/include/generic/rte_cpuflags.h +++ b/lib/librte_eal/include/generic/rte_cpuflags.h @@ -13,6 +13,32 @@ #include "rte_common.h" #include +#include + +/** + * Structure used to describe platform-specific intrinsics that may or may not + * be supported at runtime. + */ +struct rte_cpu_intrinsics { + uint32_t power_monitor : 1; + /**< indicates support for rte_power_monitor function */ + uint32_t power_pause : 1; + /**< indicates support for rte_power_pause function */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check CPU support for various intrinsics at runtime. + * + * @param intrinsics + * Pointer to a structure to be filled. + */ +__rte_experimental +void +rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics); + /** * Enumeration of all CPU features supported */ diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h index fb897d9060..03a326f076 100644 --- a/lib/librte_eal/include/generic/rte_power_intrinsics.h +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h @@ -32,6 +32,10 @@ * checked against the expected value, and if they match, the entering of * optimized power state may be aborted. * + * @warning It is responsibility of the user to check if this function is + * supported at runtime using `rte_cpu_get_features()` API call. Failing to do + * so may result in an illegal CPU instruction error. + * * @param p * Address to monitor for changes. * @param expected_value @@ -69,6 +73,10 @@ static inline void rte_power_monitor(const volatile void *p, * This call will also lock a spinlock on entering sleep, and release it on * waking up the CPU. * + * @warning It is responsibility of the user to check if this function is + * supported at runtime using `rte_cpu_get_features()` API call. Failing to do + * so may result in an illegal CPU instruction error. + * * @param p * Address to monitor for changes. * @param expected_value @@ -101,6 +109,10 @@ static inline void rte_power_monitor_sync(const volatile void *p, * Enter an architecture-defined optimized power state until a certain TSC * timestamp is reached. * + * @warning It is responsibility of the user to check if this function is + * supported at runtime using `rte_cpu_get_features()` API call. Failing to do + * so may result in an illegal CPU instruction error. + * * @param tsc_timestamp * Maximum TSC timestamp to wait for. Note that the wait behavior is * architecture-dependent. diff --git a/lib/librte_eal/ppc/rte_cpuflags.c b/lib/librte_eal/ppc/rte_cpuflags.c index 3bb7563ce9..61db5c216d 100644 --- a/lib/librte_eal/ppc/rte_cpuflags.c +++ b/lib/librte_eal/ppc/rte_cpuflags.c @@ -8,6 +8,7 @@ #include #include #include +#include #include /* Symbolic values for the entries in the auxiliary table */ @@ -108,3 +109,9 @@ rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) return NULL; return rte_cpu_feature_table[feature].name; } + +void +rte_cpu_get_intrinsics_support(s
[dpdk-dev] [PATCH v9 07/10] net/i40e: implement power management API
Implement support for the power management API by implementing a `get_wake_addr` function that will return an address of an RX ring's status bit. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: Konstantin Ananyev Acked-by: Jeff Guo --- drivers/net/i40e/i40e_ethdev.c | 1 + drivers/net/i40e/i40e_rxtx.c | 26 ++ drivers/net/i40e/i40e_rxtx.h | 2 ++ 3 files changed, 29 insertions(+) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 4778aaf299..358a38232b 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -513,6 +513,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { .mtu_set = i40e_dev_mtu_set, .tm_ops_get = i40e_tm_ops_get, .tx_done_cleanup = i40e_tx_done_cleanup, + .get_wake_addr= i40e_get_wake_addr, }; /* store statistics names and its offset in stats structure */ diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 5df9a9df56..78862fe3a2 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -72,6 +72,32 @@ #define I40E_TX_OFFLOAD_NOTSUP_MASK \ (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) +int +i40e_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *mask, uint8_t *data_sz) +{ + struct i40e_rx_queue *rxq = rx_queue; + volatile union i40e_rx_desc *rxdp; + uint16_t desc; + + desc = rxq->rx_tail; + rxdp = &rxq->rx_ring[desc]; + /* watch for changes in status bit */ + *tail_desc_addr = &rxdp->wb.qword1.status_error_len; + + /* +* we expect the DD bit to be set to 1 if this descriptor was already +* written to. +*/ + *expected = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT); + *mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT); + + /* registers are 64-bit */ + *data_sz = 8; + + return 0; +} + static inline void i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp) { diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index 57d7b4160b..5826cf1099 100644 --- a/drivers/net/i40e/i40e_rxtx.h +++ b/drivers/net/i40e/i40e_rxtx.h @@ -248,6 +248,8 @@ uint16_t i40e_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t i40e_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +int i40e_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *value, uint8_t *data_sz); /* For each value it means, datasheet of hardware can tell more details * -- 2.17.1
[dpdk-dev] [PATCH v9 06/10] net/ixgbe: implement power management API
Implement support for the power management API by implementing a `get_wake_addr` function that will return an address of an RX ring's status bit. Signed-off-by: Anatoly Burakov Signed-off-by: Liang Ma Acked-by: Konstantin Ananyev --- drivers/net/ixgbe/ixgbe_ethdev.c | 1 + drivers/net/ixgbe/ixgbe_rxtx.c | 25 + drivers/net/ixgbe/ixgbe_rxtx.h | 2 ++ 3 files changed, 28 insertions(+) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 00101c2eec..fcc4026372 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -588,6 +588,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = { .udp_tunnel_port_del = ixgbe_dev_udp_tunnel_port_del, .tm_ops_get = ixgbe_tm_ops_get, .tx_done_cleanup = ixgbe_dev_tx_done_cleanup, + .get_wake_addr= ixgbe_get_wake_addr, }; /* diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index d1d3baff90..096dff37ba 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1367,6 +1367,31 @@ const uint32_t RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP, }; +int ixgbe_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *mask, uint8_t *data_sz) +{ + volatile union ixgbe_adv_rx_desc *rxdp; + struct ixgbe_rx_queue *rxq = rx_queue; + uint16_t desc; + + desc = rxq->rx_tail; + rxdp = &rxq->rx_ring[desc]; + /* watch for changes in status bit */ + *tail_desc_addr = &rxdp->wb.upper.status_error; + + /* +* we expect the DD bit to be set to 1 if this descriptor was already +* written to. +*/ + *expected = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD); + *mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD); + + /* the registers are 32-bit */ + *data_sz = 4; + + return 0; +} + /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */ static inline uint32_t ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask) diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h index 6d2f7c9da3..1ef0b05e66 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.h +++ b/drivers/net/ixgbe/ixgbe_rxtx.h @@ -299,5 +299,7 @@ uint64_t ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev); uint64_t ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev); uint64_t ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev); uint64_t ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev); +int ixgbe_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *mask, uint8_t *data_sz); #endif /* _IXGBE_RXTX_H_ */ -- 2.17.1
[dpdk-dev] [PATCH v9 10/10] doc: update programmer's guide for power library
Update programmer's guide to document PMD power management usage. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Hunt --- doc/guides/prog_guide/power_man.rst | 42 + 1 file changed, 42 insertions(+) diff --git a/doc/guides/prog_guide/power_man.rst b/doc/guides/prog_guide/power_man.rst index 0a3755a901..38c64d31e4 100644 --- a/doc/guides/prog_guide/power_man.rst +++ b/doc/guides/prog_guide/power_man.rst @@ -192,6 +192,45 @@ User Cases -- The mechanism can applied to any device which is based on polling. e.g. NIC, FPGA. +PMD Power Management API + + +Abstract + +Existing power management mechanisms require developers to change application +design or change code to make use of it. The PMD power management API provides a +convenient alternative by utilizing Ethernet PMD RX callbacks, and triggering +power saving whenever empty poll count reaches a certain number. + + * UMWAIT/UMONITOR + + This power saving scheme will put the CPU into optimized power state and use + the UMWAIT/UMONITOR instructions to monitor the Ethernet PMD RX descriptor + address, and wake the CPU up whenever there's new traffic. + + * Pause + + This power saving scheme will use the `rte_pause` function to avoid busy + polling. + + * Frequency scaling + + This power saving scheme will use existing power library functionality to + scale the core frequency up/down depending on traffic volume. + + +.. note:: + + Currently, this power management API is limited to mandatory mapping of 1 + queue to 1 core (multiple queues are supported, but they must be polled from + different cores). + +API Overview for PMD Power Management + +* **Queue Enable**: Enable specific power scheme for certain queue/port/core + +* **Queue Disable**: Disable power scheme for certain queue/port/core + References -- @@ -200,3 +239,6 @@ References * The :doc:`../sample_app_ug/vm_power_management` chapter in the :doc:`../sample_app_ug/index` section. + +* The :doc:`../sample_app_ug/rxtx_callbacks` +chapter in the :doc:`../sample_app_ug/index` section. -- 2.17.1
[dpdk-dev] [PATCH v9 08/10] net/ice: implement power management API
Implement support for the power management API by implementing a `get_wake_addr` function that will return an address of an RX ring's status bit. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: Konstantin Ananyev --- drivers/net/ice/ice_ethdev.c | 1 + drivers/net/ice/ice_rxtx.c | 26 ++ drivers/net/ice/ice_rxtx.h | 2 ++ 3 files changed, 29 insertions(+) diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index c65125ff32..54f185ad4d 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -216,6 +216,7 @@ static const struct eth_dev_ops ice_eth_dev_ops = { .udp_tunnel_port_add = ice_dev_udp_tunnel_port_add, .udp_tunnel_port_del = ice_dev_udp_tunnel_port_del, .tx_done_cleanup = ice_tx_done_cleanup, + .get_wake_addr= ice_get_wake_addr, }; /* store statistics names and its offset in stats structure */ diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index ee576c362a..fafd6ada62 100644 --- a/drivers/net/ice/ice_rxtx.c +++ b/drivers/net/ice/ice_rxtx.c @@ -26,6 +26,32 @@ uint64_t rte_net_ice_dynflag_proto_xtr_ipv6_flow_mask; uint64_t rte_net_ice_dynflag_proto_xtr_tcp_mask; uint64_t rte_net_ice_dynflag_proto_xtr_ip_offset_mask; +int ice_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *mask, uint8_t *data_sz) +{ + volatile union ice_rx_flex_desc *rxdp; + struct ice_rx_queue *rxq = rx_queue; + uint16_t desc; + + desc = rxq->rx_tail; + rxdp = &rxq->rx_ring[desc]; + /* watch for changes in status bit */ + *tail_desc_addr = &rxdp->wb.status_error0; + + /* +* we expect the DD bit to be set to 1 if this descriptor was already +* written to. +*/ + *expected = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S); + *mask = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S); + + /* register is 16-bit */ + *data_sz = 2; + + return 0; +} + + static inline uint8_t ice_proto_xtr_type_to_rxdid(uint8_t xtr_type) { diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h index 1c23c7541e..7eeb8d467e 100644 --- a/drivers/net/ice/ice_rxtx.h +++ b/drivers/net/ice/ice_rxtx.h @@ -250,6 +250,8 @@ uint16_t ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); int ice_fdir_programming(struct ice_pf *pf, struct ice_fltr_desc *fdir_desc); int ice_tx_done_cleanup(void *txq, uint32_t free_cnt); +int ice_get_wake_addr(void *rx_queue, volatile void **tail_desc_addr, + uint64_t *expected, uint64_t *mask, uint8_t *data_sz); #define FDIR_PARSING_ENABLE_PER_QUEUE(ad, on) do { \ int i; \ -- 2.17.1
[dpdk-dev] [PATCH v9 09/10] examples/l3fwd-power: enable PMD power mgmt
Add PMD power management feature support to l3fwd-power sample app. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Hunt --- Notes: v8: - Add return status check for queue enable v6: - Fixed typos in documentation --- .../sample_app_ug/l3_forward_power_man.rst| 13 ++ examples/l3fwd-power/main.c | 46 ++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst index d7e1dc5813..b10ebe662e 100644 --- a/doc/guides/sample_app_ug/l3_forward_power_man.rst +++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst @@ -109,6 +109,8 @@ where, * --telemetry: Telemetry mode. +* --pmd-mgmt: PMD power management mode. + See :doc:`l3_forward` for details. The L3fwd-power example reuses the L3fwd command line options. @@ -455,3 +457,14 @@ reference cycles and accordingly busy rate is set to either 0% or The new stats ``empty_poll`` , ``full_poll`` and ``busy_percent`` can be viewed by running the script ``/usertools/dpdk-telemetry-client.py`` and selecting the menu option ``Send for global Metrics``. + +PMD power management Mode +- + +The PMD power management mode support for ``l3fwd-power`` is a standalone mode, in this mode +``l3fwd-power`` does simple l3fwding along with enable the power saving scheme on specific +port/queue/lcore. Main purpose for this mode is to demonstrate how to use the PMD power management API. + +.. code-block:: console + +./build/examples/dpdk-l3fwd-power -l 1-3 -- --pmd-mgmt -p 0x0f --config="(0,0,2),(0,1,3)" diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c index a48d75f68f..aafa415f0b 100644 --- a/examples/l3fwd-power/main.c +++ b/examples/l3fwd-power/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "perf_core.h" #include "main.h" @@ -199,7 +200,8 @@ enum appmode { APP_MODE_LEGACY, APP_MODE_EMPTY_POLL, APP_MODE_TELEMETRY, - APP_MODE_INTERRUPT + APP_MODE_INTERRUPT, + APP_MODE_PMD_MGMT }; enum appmode app_mode; @@ -1750,6 +1752,7 @@ parse_ep_config(const char *q_arg) #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" #define CMD_LINE_OPT_TELEMETRY "telemetry" +#define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" /* Parse the argument given in the command line of the application */ static int @@ -1771,6 +1774,7 @@ parse_args(int argc, char **argv) {CMD_LINE_OPT_LEGACY, 0, 0, 0}, {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, + {CMD_LINE_OPT_PMD_MGMT, 0, 0, 0}, {NULL, 0, 0, 0} }; @@ -1881,6 +1885,16 @@ parse_args(int argc, char **argv) printf("telemetry mode is enabled\n"); } + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_PMD_MGMT, + sizeof(CMD_LINE_OPT_PMD_MGMT))) { + if (app_mode != APP_MODE_DEFAULT) { + printf(" power mgmt mode is mutually exclusive with other modes\n"); + return -1; + } + app_mode = APP_MODE_PMD_MGMT; + printf("PMD power mgmt mode is enabled\n"); + } if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_INTERRUPT_ONLY, sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { @@ -2437,6 +2451,8 @@ mode_to_str(enum appmode mode) return "telemetry"; case APP_MODE_INTERRUPT: return "interrupt-only"; + case APP_MODE_PMD_MGMT: + return "pmd mgmt"; default: return "invalid"; } @@ -2705,6 +2721,17 @@ main(int argc, char **argv) } else if (!check_ptype(portid)) rte_exit(EXIT_FAILURE, "PMD can not provide needed ptypes\n"); + if (app_mode == APP_MODE_PMD_MGMT) { + ret = rte_power_pmd_mgmt_queue_enable(lcore_id, +portid, queueid, +RTE_POWER_MGMT_TYPE_SCALE); + + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_power_pmd_mgmt enable: err=%d, " + "port=%d\n", ret, portid); + + } } } @@ -2790,6 +281
[dpdk-dev] [PATCH v4 1/7] app/bbdev: add explicit ut for latency vs validation
Adding explicit different ut when testing for validation or latency (early termination enabled or not). Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 92 ++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 6e5535d..3554a77 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3999,12 +3999,14 @@ typedef int (test_case_function)(struct active_device *ad, return i; } +/* Test case for latency/validation for LDPC Decoder */ static int latency_test_ldpc_dec(struct rte_mempool *mempool, struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, int vector_mask, uint16_t dev_id, uint16_t queue_id, const uint16_t num_to_process, uint16_t burst_sz, - uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) + uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, + bool disable_et) { int ret = TEST_SUCCESS; uint16_t i, j, dequeued; @@ -4026,7 +4028,7 @@ typedef int (test_case_function)(struct active_device *ad, "rte_bbdev_dec_op_alloc_bulk() failed"); /* For latency tests we need to disable early termination */ - if (check_bit(ref_op->ldpc_dec.op_flags, + if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; @@ -4248,7 +4250,7 @@ typedef int (test_case_function)(struct active_device *ad, TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); printf("+ --- +\n"); - printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", + printf("== test: latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", info.dev_name, burst_sz, num_to_process, op_type_str); if (op_type == RTE_BBDEV_OP_TURBO_DEC) @@ -4270,7 +4272,83 @@ typedef int (test_case_function)(struct active_device *ad, iter = latency_test_ldpc_dec(op_params->mp, bufs, op_params->ref_dec_op, op_params->vector_mask, ad->dev_id, queue_id, num_to_process, + burst_sz, &total_time, &min_time, &max_time, + true); + else + iter = latency_test_enc(op_params->mp, bufs, + op_params->ref_enc_op, + ad->dev_id, queue_id, + num_to_process, burst_sz, &total_time, + &min_time, &max_time); + + if (iter <= 0) + return TEST_FAILED; + + printf("Operation latency:\n" + "\tavg: %lg cycles, %lg us\n" + "\tmin: %lg cycles, %lg us\n" + "\tmax: %lg cycles, %lg us\n", + (double)total_time / (double)iter, + (double)(total_time * 100) / (double)iter / + (double)rte_get_tsc_hz(), (double)min_time, + (double)(min_time * 100) / (double)rte_get_tsc_hz(), + (double)max_time, (double)(max_time * 100) / + (double)rte_get_tsc_hz()); + + return TEST_SUCCESS; +} + +static int +validation_test(struct active_device *ad, + struct test_op_params *op_params) +{ + int iter; + uint16_t burst_sz = op_params->burst_sz; + const uint16_t num_to_process = op_params->num_to_process; + const enum rte_bbdev_op_type op_type = test_vector.op_type; + const uint16_t queue_id = ad->queue_ids[0]; + struct test_buffers *bufs = NULL; + struct rte_bbdev_info info; + uint64_t total_time, min_time, max_time; + const char *op_type_str; + + total_time = max_time = 0; + min_time = UINT64_MAX; + + TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), + "BURST_SIZE should be <= %u", MAX_BURST); + + rte_bbdev_info_get(ad->dev_id, &info); + bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; + + op_type_str = rte_bbdev_op_type_str(op_type); + TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); + + printf("+ --- +\n"); + printf("== test: validation\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", + info.dev_name, burst_sz, num_to_process, op_type_str);
[dpdk-dev] [PATCH v4 0/7] BBDEV test updates
v4: rebased on main latest v3: apologize again for typo and not double checking with check-git-log v2: typos missed in commit messages Serie updating and extending the app running the bbdev-test for the existing bbdev PMDs. Nicolas Chautru (7): app/bbdev: add explicit ut for latency vs validation app/bbdev: add explicit check for counters app/bbdev: include explict HARQ preloading app/bbdev: define wait for offload app/bbdev: skip bler ut when compression is used app/bbdev: reduce duration of throughput test app/bbdev: update offload test to dequeue full ring app/test-bbdev/main.h| 1 + app/test-bbdev/test_bbdev_perf.c | 193 ++- 2 files changed, 152 insertions(+), 42 deletions(-) -- 1.8.3.1
[dpdk-dev] [PATCH v4 2/7] app/bbdev: add explicit check for counters
Adding explict check in ut that the stats counters have the expect values. Was missing for coverage. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 17 + 1 file changed, 17 insertions(+) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 3554a77..b62848e 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4840,6 +4840,23 @@ typedef int (test_case_function)(struct active_device *ad, (double)(time_st.deq_max_time * 100) / rte_get_tsc_hz()); + struct rte_bbdev_stats stats = {0}; + get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); + if (op_type != RTE_BBDEV_OP_LDPC_DEC) { + TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, + "Mismatch in enqueue count %10"PRIu64" %d", + stats.enqueued_count, num_to_process); + TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, + "Mismatch in dequeue count %10"PRIu64" %d", + stats.dequeued_count, num_to_process); + } + TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, + "Enqueue count Error %10"PRIu64"", + stats.enqueue_err_count); + TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, + "Dequeue count Error (%10"PRIu64"", + stats.dequeue_err_count); + return TEST_SUCCESS; #endif } -- 1.8.3.1
[dpdk-dev] [PATCH v4 4/7] app/bbdev: define wait for offload
Replacing magic number for default wait time for hw offload. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/test_bbdev_perf.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index f30cbdb..39f06db 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -25,6 +25,7 @@ #define MAX_QUEUES RTE_MAX_LCORE #define TEST_REPETITIONS 1000 +#define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC #include @@ -4451,7 +4452,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4542,7 +4543,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4630,7 +4631,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4713,7 +4714,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); -- 1.8.3.1
[dpdk-dev] [PATCH v4 6/7] app/bbdev: reduce duration of throughput test
Reducing number of repetitions from 1000 to 100 to save time. Results are accurate enough with 100 loops. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/test_bbdev_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index a15ea69..b5dc536 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -24,7 +24,7 @@ #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) #define MAX_QUEUES RTE_MAX_LCORE -#define TEST_REPETITIONS 1000 +#define TEST_REPETITIONS 100 #define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC -- 1.8.3.1
[dpdk-dev] [PATCH v4 3/7] app/bbdev: include explict HARQ preloading
Run preloading explictly for unit tests. Load each code block by reusing existing input op then restore for the actual test. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/main.h| 1 + app/test-bbdev/test_bbdev_perf.c | 51 +--- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index fb3dec8..dc10a50 100644 --- a/app/test-bbdev/main.h +++ b/app/test-bbdev/main.h @@ -17,6 +17,7 @@ #define TEST_SKIPPED1 #define MAX_BURST 512U +#define MAX_OPS 1024U #define DEFAULT_BURST 32U #define DEFAULT_OPS 64U #define DEFAULT_ITER 6U diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index b62848e..f30cbdb 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct active_device *ad, bool preload) { uint16_t j; - int ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; - struct rte_bbdev_op_data save_hc_in, save_hc_out; - struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; + int deq; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; + struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; - for (j = 0; j < n; ++j) { - if ((mem_in || hc_in) && preload) { - save_hc_in = ops[j]->ldpc_dec.harq_combined_input; - save_hc_out = ops[j]->ldpc_dec.harq_combined_output; + if ((mem_in || hc_in) && preload) { + for (j = 0; j < n; ++j) { + save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; + save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; ops[j]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct active_device *ad, ops[j]->ldpc_dec.harq_combined_output.offset = harq_offset; ops[j]->ldpc_dec.harq_combined_input.offset = 0; - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, - &ops[j], 1); - ret = 0; - while (ret == 0) - ret = rte_bbdev_dequeue_ldpc_dec_ops( - dev_id, queue_id, &ops_deq[j], 1); + harq_offset += HARQ_INCR; + } + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); + deq = 0; + while (deq != n) + deq += rte_bbdev_dequeue_ldpc_dec_ops( + dev_id, queue_id, &ops_deq[deq], + n - deq); + /* Restore the operations */ + for (j = 0; j < n; ++j) { ops[j]->ldpc_dec.op_flags = flags; - ops[j]->ldpc_dec.harq_combined_input = save_hc_in; - ops[j]->ldpc_dec.harq_combined_output = save_hc_out; + ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; + ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; } + } + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + for (j = 0; j < n; ++j) { /* Adjust HARQ offset when we reach external DDR */ if (mem_in || hc_in) ops[j]->ldpc_dec.harq_combined_input.offset @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct active_device *ad, mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11 +3367,9 @@ typedef int (tes
[dpdk-dev] [PATCH v4 7/7] app/bbdev: update offload test to dequeue full ring
update offload dequeue to retrieve the full ring to be agnostic of implementation. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index b5dc536..a6884c5 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4463,8 +4463,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4554,8 +4554,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4642,8 +4642,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4725,8 +4725,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, -- 1.8.3.1
[dpdk-dev] [PATCH v4 5/7] app/bbdev: skip bler ut when compression is used
bler test results are not valid when LLR compression is used or for loopback scenarios. Skipping these. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 39f06db..a15ea69 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3719,7 +3719,11 @@ typedef int (test_case_function)(struct active_device *ad, RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE)); - if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) + if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && + !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) + && !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_LLR_COMPRESSION)) bler_function = bler_pmd_lcore_ldpc_dec; else return TEST_SKIPPED; -- 1.8.3.1
[dpdk-dev] [PATCH v5 1/7] app/bbdev: add explicit ut for latency vs validation
Adding explicit different ut when testing for validation or latency (early termination enabled or not). Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 92 ++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 6e5535d..3554a77 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3999,12 +3999,14 @@ typedef int (test_case_function)(struct active_device *ad, return i; } +/* Test case for latency/validation for LDPC Decoder */ static int latency_test_ldpc_dec(struct rte_mempool *mempool, struct test_buffers *bufs, struct rte_bbdev_dec_op *ref_op, int vector_mask, uint16_t dev_id, uint16_t queue_id, const uint16_t num_to_process, uint16_t burst_sz, - uint64_t *total_time, uint64_t *min_time, uint64_t *max_time) + uint64_t *total_time, uint64_t *min_time, uint64_t *max_time, + bool disable_et) { int ret = TEST_SUCCESS; uint16_t i, j, dequeued; @@ -4026,7 +4028,7 @@ typedef int (test_case_function)(struct active_device *ad, "rte_bbdev_dec_op_alloc_bulk() failed"); /* For latency tests we need to disable early termination */ - if (check_bit(ref_op->ldpc_dec.op_flags, + if (disable_et && check_bit(ref_op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE)) ref_op->ldpc_dec.op_flags -= RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE; @@ -4248,7 +4250,7 @@ typedef int (test_case_function)(struct active_device *ad, TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); printf("+ --- +\n"); - printf("== test: validation/latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", + printf("== test: latency\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", info.dev_name, burst_sz, num_to_process, op_type_str); if (op_type == RTE_BBDEV_OP_TURBO_DEC) @@ -4270,7 +4272,83 @@ typedef int (test_case_function)(struct active_device *ad, iter = latency_test_ldpc_dec(op_params->mp, bufs, op_params->ref_dec_op, op_params->vector_mask, ad->dev_id, queue_id, num_to_process, + burst_sz, &total_time, &min_time, &max_time, + true); + else + iter = latency_test_enc(op_params->mp, bufs, + op_params->ref_enc_op, + ad->dev_id, queue_id, + num_to_process, burst_sz, &total_time, + &min_time, &max_time); + + if (iter <= 0) + return TEST_FAILED; + + printf("Operation latency:\n" + "\tavg: %lg cycles, %lg us\n" + "\tmin: %lg cycles, %lg us\n" + "\tmax: %lg cycles, %lg us\n", + (double)total_time / (double)iter, + (double)(total_time * 100) / (double)iter / + (double)rte_get_tsc_hz(), (double)min_time, + (double)(min_time * 100) / (double)rte_get_tsc_hz(), + (double)max_time, (double)(max_time * 100) / + (double)rte_get_tsc_hz()); + + return TEST_SUCCESS; +} + +static int +validation_test(struct active_device *ad, + struct test_op_params *op_params) +{ + int iter; + uint16_t burst_sz = op_params->burst_sz; + const uint16_t num_to_process = op_params->num_to_process; + const enum rte_bbdev_op_type op_type = test_vector.op_type; + const uint16_t queue_id = ad->queue_ids[0]; + struct test_buffers *bufs = NULL; + struct rte_bbdev_info info; + uint64_t total_time, min_time, max_time; + const char *op_type_str; + + total_time = max_time = 0; + min_time = UINT64_MAX; + + TEST_ASSERT_SUCCESS((burst_sz > MAX_BURST), + "BURST_SIZE should be <= %u", MAX_BURST); + + rte_bbdev_info_get(ad->dev_id, &info); + bufs = &op_params->q_bufs[GET_SOCKET(info.socket_id)][queue_id]; + + op_type_str = rte_bbdev_op_type_str(op_type); + TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type); + + printf("+ --- +\n"); + printf("== test: validation\ndev: %s, burst size: %u, num ops: %u, op type: %s\n", + info.dev_name, burst_sz, num_to_process, op_type_str);
[dpdk-dev] [PATCH v5 0/7] BBDEV test updates
v5: correcting typos again. Quite worrisome spelling disorder. v4: rebased on main latest v3: apologize again for typo and not double checking with check-git-log v2: typos missed in commit messages Serie updating and extending the app running the bbdev-test for the existing bbdev PMDs. Nicolas Chautru (7): app/bbdev: add explicit ut for latency vs validation app/bbdev: add explicit check for counters app/bbdev: include explicit HARQ preloading app/bbdev: define wait for offload app/bbdev: skip bler ut when compression is used app/bbdev: reduce duration of throughput test app/bbdev: update offload test to dequeue full ring app/test-bbdev/main.h| 1 + app/test-bbdev/test_bbdev_perf.c | 193 ++- 2 files changed, 152 insertions(+), 42 deletions(-) -- 1.8.3.1
[dpdk-dev] [PATCH v5 3/7] app/bbdev: include explicit HARQ preloading
Run preloading explicitly for unit tests. Load each code block by reusing existing input op then restore for the actual test. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/main.h| 1 + app/test-bbdev/test_bbdev_perf.c | 51 +--- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index fb3dec8..dc10a50 100644 --- a/app/test-bbdev/main.h +++ b/app/test-bbdev/main.h @@ -17,6 +17,7 @@ #define TEST_SKIPPED1 #define MAX_BURST 512U +#define MAX_OPS 1024U #define DEFAULT_BURST 32U #define DEFAULT_OPS 64U #define DEFAULT_ITER 6U diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index b62848e..f30cbdb 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct active_device *ad, bool preload) { uint16_t j; - int ret; - uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024; - struct rte_bbdev_op_data save_hc_in, save_hc_out; - struct rte_bbdev_dec_op *ops_deq[MAX_BURST]; + int deq; + uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS]; + struct rte_bbdev_dec_op *ops_deq[MAX_OPS]; uint32_t flags = ops[0]->ldpc_dec.op_flags; bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE; bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE; bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE; bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION; - for (j = 0; j < n; ++j) { - if ((mem_in || hc_in) && preload) { - save_hc_in = ops[j]->ldpc_dec.harq_combined_input; - save_hc_out = ops[j]->ldpc_dec.harq_combined_output; + if ((mem_in || hc_in) && preload) { + for (j = 0; j < n; ++j) { + save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input; + save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output; ops[j]->ldpc_dec.op_flags = RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE; @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct active_device *ad, ops[j]->ldpc_dec.harq_combined_output.offset = harq_offset; ops[j]->ldpc_dec.harq_combined_input.offset = 0; - rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, - &ops[j], 1); - ret = 0; - while (ret == 0) - ret = rte_bbdev_dequeue_ldpc_dec_ops( - dev_id, queue_id, &ops_deq[j], 1); + harq_offset += HARQ_INCR; + } + rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n); + deq = 0; + while (deq != n) + deq += rte_bbdev_dequeue_ldpc_dec_ops( + dev_id, queue_id, &ops_deq[deq], + n - deq); + /* Restore the operations */ + for (j = 0; j < n; ++j) { ops[j]->ldpc_dec.op_flags = flags; - ops[j]->ldpc_dec.harq_combined_input = save_hc_in; - ops[j]->ldpc_dec.harq_combined_output = save_hc_out; + ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j]; + ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j]; } + } + harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS; + for (j = 0; j < n; ++j) { /* Adjust HARQ offset when we reach external DDR */ if (mem_in || hc_in) ops[j]->ldpc_dec.harq_combined_input.offset @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct active_device *ad, mbuf_reset( ops_enq[j]->ldpc_dec.harq_combined_output.data); } - if (extDdr) { - bool preload = i == (TEST_REPETITIONS - 1); + if (extDdr) preload_harq_ddr(tp->dev_id, queue_id, ops_enq, - num_ops, preload); - } + num_ops, true); start_time = rte_rdtsc_precise(); for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11 +3367,9 @@ typedef int (te
[dpdk-dev] [PATCH v5 2/7] app/bbdev: add explicit check for counters
Adding explicit check in ut that the stats counters have the expect values. Was missing for coverage. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 17 + 1 file changed, 17 insertions(+) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 3554a77..b62848e 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4840,6 +4840,23 @@ typedef int (test_case_function)(struct active_device *ad, (double)(time_st.deq_max_time * 100) / rte_get_tsc_hz()); + struct rte_bbdev_stats stats = {0}; + get_bbdev_queue_stats(ad->dev_id, queue_id, &stats); + if (op_type != RTE_BBDEV_OP_LDPC_DEC) { + TEST_ASSERT_SUCCESS(stats.enqueued_count != num_to_process, + "Mismatch in enqueue count %10"PRIu64" %d", + stats.enqueued_count, num_to_process); + TEST_ASSERT_SUCCESS(stats.dequeued_count != num_to_process, + "Mismatch in dequeue count %10"PRIu64" %d", + stats.dequeued_count, num_to_process); + } + TEST_ASSERT_SUCCESS(stats.enqueue_err_count != 0, + "Enqueue count Error %10"PRIu64"", + stats.enqueue_err_count); + TEST_ASSERT_SUCCESS(stats.dequeue_err_count != 0, + "Dequeue count Error (%10"PRIu64"", + stats.dequeue_err_count); + return TEST_SUCCESS; #endif } -- 1.8.3.1
[dpdk-dev] [PATCH v5 5/7] app/bbdev: skip bler ut when compression is used
bler test results are not valid when LLR compression is used or for loopback scenarios. Skipping these. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index 39f06db..a15ea69 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -3719,7 +3719,11 @@ typedef int (test_case_function)(struct active_device *ad, RTE_ALIGN(sizeof(struct thread_params) * num_lcores, RTE_CACHE_LINE_SIZE)); - if (test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) + if ((test_vector.op_type == RTE_BBDEV_OP_LDPC_DEC) && + !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK) + && !check_bit(test_vector.ldpc_dec.op_flags, + RTE_BBDEV_LDPC_LLR_COMPRESSION)) bler_function = bler_pmd_lcore_ldpc_dec; else return TEST_SKIPPED; -- 1.8.3.1
[dpdk-dev] [PATCH v5 4/7] app/bbdev: define wait for offload
Replacing magic number for default wait time for hw offload. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/test_bbdev_perf.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index f30cbdb..39f06db 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -25,6 +25,7 @@ #define MAX_QUEUES RTE_MAX_LCORE #define TEST_REPETITIONS 1000 +#define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC #include @@ -4451,7 +4452,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4542,7 +4543,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4630,7 +4631,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); @@ -4713,7 +4714,7 @@ typedef int (test_case_function)(struct active_device *ad, time_st->enq_acc_total_time += stats.acc_offload_cycles; /* give time for device to process ops */ - rte_delay_us(200); + rte_delay_us(WAIT_OFFLOAD_US); /* Start time meas for dequeue function offload latency */ deq_start_time = rte_rdtsc_precise(); -- 1.8.3.1
[dpdk-dev] [PATCH v5 7/7] app/bbdev: update offload test to dequeue full ring
update offload dequeue to retrieve the full ring to be agnostic of implementation. Signed-off-by: Nicolas Chautru Acked-by: Aidan Goddard Acked-by: Dave Burley --- app/test-bbdev/test_bbdev_perf.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index b5dc536..a6884c5 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -4463,8 +4463,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4554,8 +4554,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_dec_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4642,8 +4642,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, @@ -4725,8 +4725,8 @@ typedef int (test_case_function)(struct active_device *ad, /* Dequeue one operation */ do { deq += rte_bbdev_dequeue_ldpc_enc_ops(dev_id, queue_id, - &ops_deq[deq], 1); - } while (unlikely(deq != 1)); + &ops_deq[deq], enq); + } while (unlikely(deq == 0)); deq_last_time = rte_rdtsc_precise() - deq_start_time; time_st->deq_max_time = RTE_MAX(time_st->deq_max_time, -- 1.8.3.1
[dpdk-dev] [PATCH v5 6/7] app/bbdev: reduce duration of throughput test
Reducing number of repetitions from 1000 to 100 to save time. Results are accurate enough with 100 loops. Signed-off-by: Nicolas Chautru Acked-by: Liu Tianjiao --- app/test-bbdev/test_bbdev_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c index a15ea69..b5dc536 100644 --- a/app/test-bbdev/test_bbdev_perf.c +++ b/app/test-bbdev/test_bbdev_perf.c @@ -24,7 +24,7 @@ #define GET_SOCKET(socket_id) (((socket_id) == SOCKET_ID_ANY) ? 0 : (socket_id)) #define MAX_QUEUES RTE_MAX_LCORE -#define TEST_REPETITIONS 1000 +#define TEST_REPETITIONS 100 #define WAIT_OFFLOAD_US 1000 #ifdef RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC -- 1.8.3.1
Re: [dpdk-dev] [PATCH 1/2] net/netvsc: allow setting rx and tx copy break
> Subject: Re: [dpdk-dev] [PATCH 1/2] net/netvsc: allow setting rx and tx copy > break > > On 10/22/2020 8:46 PM, Long Li wrote: > > From: Stephen Hemminger > > > > The values for Rx and Tx copy break should be tunable rather than hard > > coded constants. > > > > The rx_copybreak sets the threshold where the driver uses an external > > mbuf to avoid having to copy data. Setting 0 for copybreak will cause > > driver to always create an external mbuf. Setting a value greater than > > the MTU would prevent it from ever making an external mbuf and always > > copy. The default value is 256 (bytes). > > > > Likewise the tx_copybreak sets the threshold where the driver > > aggregates multiple small packets into one request. If tx_copybreak is > > 0 then each packet goes as a VMBus request (no copying). > > If tx_copybreak is set larger than the MTU, then all packets smaller > > than the chunk size of the VMBus send buffer will be copied; larger > > packets always have to go as a single direct request. The default > > value is 512 (bytes). > > > > Signed-off-by: Stephen Hemminger > > Signed-off-by: Long Li > > <...> > > > @@ -45,6 +45,10 @@ > > DEV_RX_OFFLOAD_VLAN_STRIP | \ > > DEV_RX_OFFLOAD_RSS_HASH) > > > > +#define NETVSC_ARG_LATENCY "latency" > > +#define NETVSC_ARG_RXBREAK "rx_copybreak" > > +#define NETVSC_ARG_TXBREAK "tx_copybreak" > > + > > Can you please document new devargs in the driver documentation? > > <...> > > > @@ -181,12 +167,32 @@ static int hn_parse_args(const struct rte_eth_dev > *dev) > > return -EINVAL; > > } > > > > - ret = rte_kvargs_process(kvlist, "latency", hn_set_latency, hv); > > - if (ret) > > - PMD_DRV_LOG(ERR, "Unable to process latency arg\n"); > > + for (i = 0; i != kvlist->count; ++i) { > > + const struct rte_kvargs_pair *pair = &kvlist->pairs[i]; > > + > > + if (!strcmp(pair->key, NETVSC_ARG_LATENCY)) > > + latency = atoi(pair->value); > > + else if (!strcmp(pair->key, NETVSC_ARG_RXBREAK)) > > + rx_break = atoi(pair->value); > > + else if (!strcmp(pair->key, NETVSC_ARG_TXBREAK)) > > + tx_break = atoi(pair->value); > > + } > > + > > Instead of accessing to the kvlist internals, I think better to use > 'rte_kvargs_process()' as done previously. > If the reason to remove callback is to not create a callback for each > argument, a > generic one can be used for all. > > > + if (latency >= 0) { > > + PMD_DRV_LOG(DEBUG, "set latency %d usec", latency); > > + hv->latency = latency * 1000; /* usec to nsec */ > > + } > > + if (rx_break >= 0) { > > + PMD_DRV_LOG(DEBUG, "rx copy break set to %d", rx_break); > > + hv->rx_copybreak = rx_break; > > + } > > + if (tx_break >= 0) { > > + PMD_DRV_LOG(DEBUG, "tx copy break set to %d", tx_break); > > + hv->tx_copybreak = tx_break; > > + } > > > > When 'rte_kvargs_process()' used, the valued can be assigned directly to 'hv- > >tx_copybreak', if the argument is not available, it won't be updated, so > >above > check can be dropped. Thanks Ferruh, I will send V2 to address comments. Long
Re: [dpdk-dev] [PATCH v3 3/5] test/ring: move common function to header file
> > > Move test_ring_inc_ptr to header file so that it can be used by > > functions in other files. > > > > Signed-off-by: Honnappa Nagarahalli > > Reviewed-by: Dharmik Thakkar > > --- > > app/test/test_ring.c | 11 --- app/test/test_ring.h | 11 > > +++ > > 2 files changed, 11 insertions(+), 11 deletions(-) > > > > diff --git a/app/test/test_ring.c b/app/test/test_ring.c index > > a62cb263b..329d538a9 100644 > > --- a/app/test/test_ring.c > > +++ b/app/test/test_ring.c > > @@ -243,17 +243,6 @@ test_ring_deq_impl(struct rte_ring *r, void **obj, > int esize, unsigned int n, > > NULL); > > } > > > > -static void** > > -test_ring_inc_ptr(void **obj, int esize, unsigned int n) -{ > > - /* Legacy queue APIs? */ > > - if ((esize) == -1) > > - return ((void **)obj) + n; > > - else > > - return (void **)(((uint32_t *)obj) + > > - (n * esize / sizeof(uint32_t))); > > -} > > - > > static void > > test_ring_mem_init(void *obj, unsigned int count, int esize) { diff > > --git a/app/test/test_ring.h b/app/test/test_ring.h index > > d4b15af7c..16697ee02 100644 > > --- a/app/test/test_ring.h > > +++ b/app/test/test_ring.h > > @@ -42,6 +42,17 @@ test_ring_create(const char *name, int esize, > unsigned int count, > > (socket_id), (flags)); > > } > > > > +static inline void** > > +test_ring_inc_ptr(void **obj, int esize, unsigned int n) { > > + /* Legacy queue APIs? */ > > + if ((esize) == -1) > > + return ((void **)obj) + n; > > + else > > + return (void **)(((uint32_t *)obj) + > > + (n * esize / sizeof(uint32_t))); } > > In all these pointer arithemetics, why do you need 'void **'? > Why just not 'void*', or even uintptr_t? I will change it as follows: static inline void* test_ring_inc_ptr(void *obj, int esize, unsigned int n) { int sz; sz = esize; /* Legacy queue APIs? */ if ((esize) == -1) sz = sizeof(void *); return (void *)((uint32_t *)obj + (n * sz / sizeof(uint32_t))); } > > > > + > > static __rte_always_inline unsigned int test_ring_enqueue(struct > > rte_ring *r, void **obj, int esize, unsigned int n, > > unsigned int api_type) > > -- > > 2.17.1
Re: [dpdk-dev] [PATCH v3 3/5] test/ring: move common function to header file
On Fri, 23 Oct 2020 23:54:22 + Honnappa Nagarahalli wrote: > > > > > > > Move test_ring_inc_ptr to header file so that it can be used by > > > functions in other files. > > > > > > Signed-off-by: Honnappa Nagarahalli > > > Reviewed-by: Dharmik Thakkar > > > --- > > > app/test/test_ring.c | 11 --- app/test/test_ring.h | 11 > > > +++ > > > 2 files changed, 11 insertions(+), 11 deletions(-) > > > > > > diff --git a/app/test/test_ring.c b/app/test/test_ring.c index > > > a62cb263b..329d538a9 100644 > > > --- a/app/test/test_ring.c > > > +++ b/app/test/test_ring.c > > > @@ -243,17 +243,6 @@ test_ring_deq_impl(struct rte_ring *r, void **obj, > > int esize, unsigned int n, > > > NULL); > > > } > > > > > > -static void** > > > -test_ring_inc_ptr(void **obj, int esize, unsigned int n) -{ > > > - /* Legacy queue APIs? */ > > > - if ((esize) == -1) > > > - return ((void **)obj) + n; > > > - else > > > - return (void **)(((uint32_t *)obj) + > > > - (n * esize / sizeof(uint32_t))); > > > -} > > > - > > > static void > > > test_ring_mem_init(void *obj, unsigned int count, int esize) { diff > > > --git a/app/test/test_ring.h b/app/test/test_ring.h index > > > d4b15af7c..16697ee02 100644 > > > --- a/app/test/test_ring.h > > > +++ b/app/test/test_ring.h > > > @@ -42,6 +42,17 @@ test_ring_create(const char *name, int esize, > > unsigned int count, > > > (socket_id), (flags)); > > > } > > > > > > +static inline void** > > > +test_ring_inc_ptr(void **obj, int esize, unsigned int n) { > > > + /* Legacy queue APIs? */ > > > + if ((esize) == -1) > > > + return ((void **)obj) + n; > > > + else > > > + return (void **)(((uint32_t *)obj) + > > > + (n * esize / sizeof(uint32_t))); } > > > > In all these pointer arithemetics, why do you need 'void **'? > > Why just not 'void*', or even uintptr_t? > I will change it as follows: > > static inline void* > test_ring_inc_ptr(void *obj, int esize, unsigned int n) > { > int sz; > > sz = esize; > /* Legacy queue APIs? */ > if ((esize) == -1) Extra (paren) doesn't help readability either
Re: [dpdk-dev] [PATCH v3 3/5] test/ring: move common function to header file
> > static inline void* > > test_ring_inc_ptr(void *obj, int esize, unsigned int n) { > > int sz; > > > > sz = esize; > > /* Legacy queue APIs? */ > > if ((esize) == -1) > > Extra (paren) doesn't help readability either +1
[dpdk-dev] [PATCH v3] mbuf: fix dynamic flags lookup from secondary process
The dynamic flag management is broken if rte_mbuf_dynflag_lookup() is done in a secondary process because the local pointer to the memzone is not ever initialized. Fix it by using the same checks as dynfield_register(). I.e if shared memory zone has not been looked up already, then discover it. Fixes: 4958ca3a443a ("mbuf: support dynamic fields and flags") Cc: olivier.m...@6wind.com Signed-off-by: Stephen Hemminger --- v3 - change title, fix one extra whitespace lib/librte_mbuf/rte_mbuf_dyn.c | 20 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/librte_mbuf/rte_mbuf_dyn.c b/lib/librte_mbuf/rte_mbuf_dyn.c index 538a43f6959f..554ec5a1ca4f 100644 --- a/lib/librte_mbuf/rte_mbuf_dyn.c +++ b/lib/librte_mbuf/rte_mbuf_dyn.c @@ -185,13 +185,11 @@ rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params) { struct mbuf_dynfield_elt *mbuf_dynfield; - if (shm == NULL) { - rte_errno = ENOENT; - return -1; - } - rte_mcfg_tailq_read_lock(); - mbuf_dynfield = __mbuf_dynfield_lookup(name); + if (shm == NULL && init_shared_mem() < 0) + mbuf_dynfield = NULL; + else + mbuf_dynfield = __mbuf_dynfield_lookup(name); rte_mcfg_tailq_read_unlock(); if (mbuf_dynfield == NULL) { @@ -384,13 +382,11 @@ rte_mbuf_dynflag_lookup(const char *name, { struct mbuf_dynflag_elt *mbuf_dynflag; - if (shm == NULL) { - rte_errno = ENOENT; - return -1; - } - rte_mcfg_tailq_read_lock(); - mbuf_dynflag = __mbuf_dynflag_lookup(name); + if (shm == NULL && init_shared_mem() < 0) + mbuf_dynflag = NULL; + else + mbuf_dynflag = __mbuf_dynflag_lookup(name); rte_mcfg_tailq_read_unlock(); if (mbuf_dynflag == NULL) { -- 2.27.0
[dpdk-dev] RTE pipeline table lookup miss with > 1 core
Hi, I am testing a pipeline application with two or more cores using dpdk 19.05. The application consists of: Core1: forever get packets from an Ethernet IF (using rte_eth_rx_burst) Inspect packet header such as EtherType, UDP desp_port, etc; determine application (say app_1, app_2, etc) Forward to rte_ring of app_i (call it app_i_ring) Core2: Specialized for app_1 processing, has an RX rte_ring (call it app_1_ring) and a app_1 pipeline consisting of a few Hash/Array tables Core3: Specialized for app_2 processing, has an RX rte_ring (call it app_2_ring) and a app_2 pipeline consisting of a few Hash/Array tables When I run this application with core1-3, it works fine without any table miss. When I add a second app_1 or app_2 core (for instance adding core4 running app_1), I get about 0.05% table miss of app_1 hash tables. The only difference in the core1-3 and core1-4 config setup is that app_1 has two cores simultaneously running its pipeline instance and doing lookup on the same set of tables. Please note that I have logged the missed lookup packets and the key in metadata and the keys are correct when the miss happens. Any reason for this table miss? Am I missing something? Thanks, Mehrdad malip...@ciena.com
[dpdk-dev] [PATCH v5 0/5] replace blacklist/whitelist with block/allow
This is a revised version of the earlier RFC patch set for changing the blacklist/whitelist terms in DPDK. The first patch is a duplicate from the other patch set about use of master/slave in API. Note: This may cause some warnings in existing programs in the CI tests using the -w flag. It also causes checkpatch complaints because we are replacing blacklist which is flagged by the current versions. v5 - rebase and fix conflicts in documentation Stephen Hemminger (5): eal: replace usage of blacklist/whitelist in enum drivers: replace references to blacklist eal: replace pci-whitelist/pci-blacklist options app/test: use new allowlist and blocklist doc: change references to blacklist and whitelist app/test/autotest.py | 16 ++--- app/test/autotest_runner.py | 18 ++--- app/test/test.c | 2 +- app/test/test_eal_flags.c | 52 +++--- doc/guides/cryptodevs/dpaa2_sec.rst | 6 +- doc/guides/cryptodevs/dpaa_sec.rst| 6 +- doc/guides/cryptodevs/qat.rst | 12 ++-- doc/guides/eventdevs/octeontx2.rst| 20 +++--- doc/guides/freebsd_gsg/build_sample_apps.rst | 2 +- doc/guides/linux_gsg/build_sample_apps.rst| 2 +- doc/guides/linux_gsg/eal_args.include.rst | 14 ++-- doc/guides/linux_gsg/linux_drivers.rst| 4 +- doc/guides/mempool/octeontx2.rst | 4 +- doc/guides/nics/bnxt.rst | 18 ++--- doc/guides/nics/cxgbe.rst | 12 ++-- doc/guides/nics/dpaa.rst | 6 +- doc/guides/nics/dpaa2.rst | 6 +- doc/guides/nics/enic.rst | 6 +- doc/guides/nics/fail_safe.rst | 16 ++--- doc/guides/nics/features.rst | 2 +- doc/guides/nics/i40e.rst | 16 ++--- doc/guides/nics/ice.rst | 28 +--- doc/guides/nics/ixgbe.rst | 4 +- doc/guides/nics/mlx4.rst | 18 ++--- doc/guides/nics/mlx5.rst | 14 ++-- doc/guides/nics/nfb.rst | 2 +- doc/guides/nics/octeontx2.rst | 23 +++--- doc/guides/nics/sfc_efx.rst | 2 +- doc/guides/nics/tap.rst | 2 +- doc/guides/nics/thunderx.rst | 4 +- .../prog_guide/env_abstraction_layer.rst | 6 +- doc/guides/prog_guide/multi_proc_support.rst | 4 +- doc/guides/prog_guide/poll_mode_drv.rst | 6 +- .../prog_guide/switch_representation.rst | 6 +- doc/guides/rel_notes/release_20_11.rst| 5 ++ doc/guides/sample_app_ug/bbdev_app.rst| 14 ++-- .../sample_app_ug/eventdev_pipeline.rst | 4 +- doc/guides/sample_app_ug/ipsec_secgw.rst | 12 ++-- doc/guides/sample_app_ug/l3_forward.rst | 7 +- .../sample_app_ug/l3_forward_access_ctrl.rst | 2 +- .../sample_app_ug/l3_forward_power_man.rst| 3 +- doc/guides/sample_app_ug/vdpa.rst | 2 +- doc/guides/tools/cryptoperf.rst | 6 +- doc/guides/tools/flow-perf.rst| 2 +- doc/guides/tools/testregex.rst| 2 +- drivers/bus/dpaa/dpaa_bus.c | 7 +- drivers/bus/fslmc/fslmc_bus.c | 9 ++- drivers/bus/fslmc/fslmc_vfio.c| 12 ++-- drivers/bus/pci/pci_common.c | 24 +++ drivers/bus/vmbus/vmbus_common.c | 4 +- drivers/crypto/virtio/virtio_pci.c| 2 +- drivers/net/virtio/virtio_pci.c | 2 +- lib/librte_eal/common/eal_common_devargs.c| 14 ++-- lib/librte_eal/common/eal_common_options.c| 70 --- lib/librte_eal/common/eal_options.h | 9 ++- lib/librte_eal/include/rte_bus.h | 10 ++- lib/librte_eal/include/rte_dev.h | 10 ++- lib/librte_eal/include/rte_devargs.h | 10 ++- 58 files changed, 328 insertions(+), 273 deletions(-) -- 2.27.0
[dpdk-dev] [PATCH v5 2/5] drivers: replace references to blacklist
Use the new terminology blocked to describe when devices are excluded from being used. Signed-off-by: Stephen Hemminger Acked-by: Luca Boccassi Acked-by: Hemant Agrawal --- drivers/bus/dpaa/dpaa_bus.c| 7 +++ drivers/bus/fslmc/fslmc_bus.c | 9 - drivers/bus/fslmc/fslmc_vfio.c | 12 ++-- drivers/bus/pci/pci_common.c | 24 ++-- drivers/bus/vmbus/vmbus_common.c | 4 ++-- drivers/crypto/virtio/virtio_pci.c | 2 +- drivers/net/virtio/virtio_pci.c| 2 +- 7 files changed, 27 insertions(+), 33 deletions(-) diff --git a/drivers/bus/dpaa/dpaa_bus.c b/drivers/bus/dpaa/dpaa_bus.c index c94c72106f2c..a1f6a60f0760 100644 --- a/drivers/bus/dpaa/dpaa_bus.c +++ b/drivers/bus/dpaa/dpaa_bus.c @@ -568,7 +568,7 @@ rte_dpaa_bus_probe(void) struct rte_dpaa_driver *drv; FILE *svr_file = NULL; unsigned int svr_ver; - int probe_all = rte_dpaa_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST; + int probe_all = rte_dpaa_bus.bus.conf.scan_mode != RTE_BUS_SCAN_ALLOWLIST; static int process_once; /* If DPAA bus is not present nothing needs to be done */ @@ -630,13 +630,12 @@ rte_dpaa_bus_probe(void) if (!drv->probe || (dev->device.devargs && - dev->device.devargs->policy == RTE_DEV_BLACKLISTED)) +dev->device.devargs->policy == RTE_DEV_BLOCKED)) continue; if (probe_all || (dev->device.devargs && - dev->device.devargs->policy == - RTE_DEV_WHITELISTED)) { +dev->device.devargs->policy == RTE_DEV_ALLOWED)) { ret = drv->probe(drv, dev); if (ret) { DPAA_BUS_ERR("unable to probe:%s", diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c index beb3dd008fbc..be4ab4ff3f64 100644 --- a/drivers/bus/fslmc/fslmc_bus.c +++ b/drivers/bus/fslmc/fslmc_bus.c @@ -403,7 +403,7 @@ rte_fslmc_probe(void) return 0; } - probe_all = rte_fslmc_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST; + probe_all = rte_fslmc_bus.bus.conf.scan_mode != RTE_BUS_SCAN_ALLOWLIST; /* In case of PA, the FD addresses returned by qbman APIs are physical * addresses, which need conversion into equivalent VA address for @@ -434,16 +434,15 @@ rte_fslmc_probe(void) continue; if (dev->device.devargs && - dev->device.devargs->policy == RTE_DEV_BLACKLISTED) { - DPAA2_BUS_LOG(DEBUG, "%s Blacklisted, skipping", + dev->device.devargs->policy == RTE_DEV_BLOCKED) { + DPAA2_BUS_LOG(DEBUG, "%s Blocklisted, skipping", dev->device.name); continue; } if (probe_all || (dev->device.devargs && - dev->device.devargs->policy == - RTE_DEV_WHITELISTED)) { + dev->device.devargs->policy == RTE_DEV_ALLOWED)) { ret = drv->probe(drv, dev); if (ret) { DPAA2_BUS_ERR("Unable to probe"); diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c index aba55b46d8da..ebcc8e1edae7 100644 --- a/drivers/bus/fslmc/fslmc_vfio.c +++ b/drivers/bus/fslmc/fslmc_vfio.c @@ -812,13 +812,13 @@ fslmc_vfio_process_group(void) if (dev->dev_type == DPAA2_MPORTAL) { dpmcp_count++; if (dev->device.devargs && - dev->device.devargs->policy == RTE_DEV_BLACKLISTED) + dev->device.devargs->policy == RTE_DEV_BLOCKED) is_dpmcp_in_blocklist = true; } if (dev->dev_type == DPAA2_IO) { dpio_count++; if (dev->device.devargs && - dev->device.devargs->policy == RTE_DEV_BLACKLISTED) + dev->device.devargs->policy == RTE_DEV_BLOCKED) is_dpio_in_blocklist = true; } } @@ -829,8 +829,8 @@ fslmc_vfio_process_group(void) if (dev->dev_type == DPAA2_MPORTAL) { current_device++; if (dev->device.devargs && - dev->device.devargs->policy == RTE_DEV_BLACKLISTED) { - DPAA2_BUS_LOG(DEBUG, "%s Blacklisted, skipping", +
[dpdk-dev] [PATCH v5 1/5] eal: replace usage of blacklist/whitelist in enum
This patch renames the enum values in the EAL include files. As a backward compatible temporary migration tool, define a replacement mapping for old values. The old names relating to blacklist and whitelist are replaced by block list and allow list, but applications may be using the older compatibility macros. To help with conversion to new names cause a message when the compatibility names are used. Signed-off-by: Stephen Hemminger Acked-by: Luca Boccassi Acked-by: Gaetan Rivet --- lib/librte_eal/common/eal_common_devargs.c | 14 +++--- lib/librte_eal/include/rte_bus.h | 10 -- lib/librte_eal/include/rte_dev.h | 10 -- lib/librte_eal/include/rte_devargs.h | 10 -- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index 2123773ef840..fcf3d9a3ccb2 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -296,7 +296,7 @@ rte_devargs_insert(struct rte_devargs **da) return 0; } -/* store a whitelist parameter for later parsing */ +/* store in allowed list parameter for later parsing */ int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str) { @@ -313,13 +313,13 @@ rte_devargs_add(enum rte_devtype devtype, const char *devargs_str) goto fail; devargs->type = devtype; bus = devargs->bus; - if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) - devargs->policy = RTE_DEV_BLACKLISTED; + if (devargs->type == RTE_DEVTYPE_BLOCKED) + devargs->policy = RTE_DEV_BLOCKED; if (bus->conf.scan_mode == RTE_BUS_SCAN_UNDEFINED) { - if (devargs->policy == RTE_DEV_WHITELISTED) - bus->conf.scan_mode = RTE_BUS_SCAN_WHITELIST; - else if (devargs->policy == RTE_DEV_BLACKLISTED) - bus->conf.scan_mode = RTE_BUS_SCAN_BLACKLIST; + if (devargs->policy == RTE_DEV_ALLOWED) + bus->conf.scan_mode = RTE_BUS_SCAN_ALLOWLIST; + else if (devargs->policy == RTE_DEV_BLOCKED) + bus->conf.scan_mode = RTE_BUS_SCAN_BLOCKLIST; } TAILQ_INSERT_TAIL(&devargs_list, devargs, next); return 0; diff --git a/lib/librte_eal/include/rte_bus.h b/lib/librte_eal/include/rte_bus.h index d3034d0edf77..80b154fb982c 100644 --- a/lib/librte_eal/include/rte_bus.h +++ b/lib/librte_eal/include/rte_bus.h @@ -215,10 +215,16 @@ typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr); */ enum rte_bus_scan_mode { RTE_BUS_SCAN_UNDEFINED, - RTE_BUS_SCAN_WHITELIST, - RTE_BUS_SCAN_BLACKLIST, + RTE_BUS_SCAN_ALLOWLIST, + RTE_BUS_SCAN_BLOCKLIST, }; +/* Backwards compatibility will be removed */ +#define RTE_BUS_SCAN_WHITELIST \ + RTE_DEPRECATED(RTE_BUS_SCAN_WHITELIST) RTE_BUS_SCAN_ALLOWLIST +#define RTE_BUS_SCAN_BLACKLIST \ + RTE_DEPRECATED(RTE_BUS_SCAN_BLACKLIST) RTE_BUS_SCAN_BLOCKLIST + /** * A structure used to configure bus operations. */ diff --git a/lib/librte_eal/include/rte_dev.h b/lib/librte_eal/include/rte_dev.h index 81905b3ae35f..6dd72c11a14a 100644 --- a/lib/librte_eal/include/rte_dev.h +++ b/lib/librte_eal/include/rte_dev.h @@ -52,10 +52,16 @@ typedef void (*rte_dev_event_cb_fn)(const char *device_name, * Device policies. */ enum rte_dev_policy { - RTE_DEV_WHITELISTED, - RTE_DEV_BLACKLISTED, + RTE_DEV_ALLOWED, + RTE_DEV_BLOCKED, }; +/* Backwards compatibility will be removed */ +#define RTE_DEV_WHITELISTED \ + RTE_DEPRECATED(RTE_DEV_WHITELISTED) RTE_DEV_ALLOWED +#define RTE_DEV_BLACKLISTED \ + RTE_DEPRECATED(RTE_DEV_BLACKLISTED) RTE_DEV_BLOCKED + /** * A generic memory resource representation. */ diff --git a/lib/librte_eal/include/rte_devargs.h b/lib/librte_eal/include/rte_devargs.h index 898efa0d667b..296f19324fae 100644 --- a/lib/librte_eal/include/rte_devargs.h +++ b/lib/librte_eal/include/rte_devargs.h @@ -29,11 +29,17 @@ extern "C" { * Type of generic device */ enum rte_devtype { - RTE_DEVTYPE_WHITELISTED_PCI, - RTE_DEVTYPE_BLACKLISTED_PCI, + RTE_DEVTYPE_ALLOWED, + RTE_DEVTYPE_BLOCKED, RTE_DEVTYPE_VIRTUAL, }; +/* Backwards compatibility will be removed later */ +#define RTE_DEVTYPE_WHITELISTED_PCI \ + RTE_DEPRECATED(RTE_DEVTYPE_WHITELISTED_PCI) RTE_DEVTYPE_ALLOWED +#define RTE_DEVTYPE_BLACKLISTED_PCI \ + RTE_DEPRECATED(RTE_DEVTYPE_BLACKLISTED_PCI) RTE_DEVTYPE_BLOCKED + /** * Structure that stores a device given by the user with its arguments * -- 2.27.0
[dpdk-dev] [PATCH v5 4/5] app/test: use new allowlist and blocklist
Test the renamed blocklist and allowlist arguments. Use new terms in test variable names as well. Signed-off-by: Stephen Hemminger Acked-by: Luca Boccassi --- app/test/autotest.py| 16 ++-- app/test/autotest_runner.py | 18 ++--- app/test/test.c | 2 +- app/test/test_eal_flags.c | 52 ++--- 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/app/test/autotest.py b/app/test/autotest.py index 9eef1efbe565..988d054ba6c8 100644 --- a/app/test/autotest.py +++ b/app/test/autotest.py @@ -10,7 +10,7 @@ def usage(): print("Usage: autotest.py [test app|test iso image] ", - "[target] [whitelist|-blacklist]") + "[target] [allowlist|-blocklist]") if len(sys.argv) < 3: usage() @@ -18,18 +18,18 @@ def usage(): target = sys.argv[2] -test_whitelist = None -test_blacklist = None +test_allowlist = None +test_blocklist = None -# get blacklist/whitelist +# get blocklist/allowlist if len(sys.argv) > 3: testlist = sys.argv[3].split(',') testlist = [test.lower() for test in testlist] if testlist[0].startswith('-'): testlist[0] = testlist[0].lstrip('-') -test_blacklist = testlist +test_blocklist = testlist else: -test_whitelist = testlist +test_allowlist = testlist cmdline = "%s -c f" % (sys.argv[1]) @@ -39,8 +39,8 @@ def usage(): # processes, so make it 1, otherwise make it 4. ignored for non-parallel tests n_processes = 1 if "bsd" in target else 4 -runner = autotest_runner.AutotestRunner(cmdline, target, test_blacklist, -test_whitelist, n_processes) +runner = autotest_runner.AutotestRunner(cmdline, target, test_blocklist, +test_allowlist, n_processes) runner.parallel_tests = autotest_data.parallel_test_list[:] runner.non_parallel_tests = autotest_data.non_parallel_test_list[:] diff --git a/app/test/autotest_runner.py b/app/test/autotest_runner.py index 998fe57a55d1..9fb94ae27352 100644 --- a/app/test/autotest_runner.py +++ b/app/test/autotest_runner.py @@ -188,14 +188,14 @@ class AutotestRunner: n_tests = 0 fails = 0 log_buffers = [] -blacklist = [] -whitelist = [] +blocklist = [] +allowlist = [] -def __init__(self, cmdline, target, blacklist, whitelist, n_processes): +def __init__(self, cmdline, target, blocklist, allowlist, n_processes): self.cmdline = cmdline self.target = target -self.blacklist = blacklist -self.whitelist = whitelist +self.blocklist = blocklist +self.allowlist = allowlist self.skipped = [] self.parallel_tests = [] self.non_parallel_tests = [] @@ -269,7 +269,7 @@ def __process_result(self, result): self.csvwriter.writerow([test_name, test_result, result_str]) # this function checks individual test and decides if this test should be in -# the group by comparing it against whitelist/blacklist. it also checks if +# the group by comparing it against allowlist/blocklist. it also checks if # the test is compiled into the binary, and marks it as skipped if necessary def __filter_test(self, test): test_cmd = test["Command"] @@ -279,10 +279,10 @@ def __filter_test(self, test): if "_autotest" in test_id: test_id = test_id[:-len("_autotest")] -# filter out blacklisted/whitelisted tests -if self.blacklist and test_id in self.blacklist: +# filter out blocklisted/allowlisted tests +if self.blocklist and test_id in self.blocklist: return False -if self.whitelist and test_id not in self.whitelist: +if self.allowlist and test_id not in self.allowlist: return False # if test wasn't compiled in, remove it as well diff --git a/app/test/test.c b/app/test/test.c index f27a56e03390..d78eb04a25e4 100644 --- a/app/test/test.c +++ b/app/test/test.c @@ -61,7 +61,7 @@ do_recursive_call(void) { "test_main_lcore_flag", no_action }, { "test_invalid_n_flag", no_action }, { "test_no_hpet_flag", no_action }, - { "test_whitelist_flag", no_action }, + { "test_allowlist_flag", no_action }, { "test_invalid_b_flag", no_action }, { "test_invalid_vdev_flag", no_action }, { "test_invalid_r_flag", no_action }, diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c index da119d4e4a92..3d3032bb4577 100644 --- a/app/test/test_eal_flags.c +++ b/app/test/test_eal_flags.c @@ -30,7 +30,7 @@ #define no_hpet "--no-hpet" #define no_huge "--no-huge" #define no_shconf "--no-shconf" -#define pci_whitelist "--pci-whitelist" +#define allow "--allow" #define vdev "--vdev" #define memtest "memtest" #define memtest1 "me
[dpdk-dev] [PATCH v5 3/5] eal: replace pci-whitelist/pci-blacklist options
Replace -w / --pci-whitelist with -a / --allow options and --pci-blacklist with --block. The -b short option remains unchanged. Allow the old options for now, but print a nag warning since old options are deprecated. Signed-off-by: Stephen Hemminger Acked-by: Luca Boccassi --- lib/librte_eal/common/eal_common_options.c | 70 +- lib/librte_eal/common/eal_options.h| 9 ++- 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index 7b3341683662..cf6c3770730a 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -52,7 +52,8 @@ const char eal_short_options[] = - "b:" /* pci-blacklist */ + "a:" /* allow */ + "b:" /* block */ "c:" /* coremask */ "s:" /* service coremask */ "d:" /* driver */ @@ -63,7 +64,8 @@ eal_short_options[] = "n:" /* memory channels */ "r:" /* memory ranks */ "v" /* version */ - "w:" /* pci-whitelist */ + "w:" /* whitelist (deprecated) */ + "B:" /* blacklist (deprecated) */ ; const struct option @@ -89,8 +91,8 @@ eal_long_options[] = { {OPT_NO_PCI,0, NULL, OPT_NO_PCI_NUM }, {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM}, {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM}, - {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM}, - {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM}, + {OPT_DEV_BLOCK, 1, NULL, OPT_DEV_BLOCK_NUM}, + {OPT_DEV_ALLOW, 1, NULL, OPT_DEV_ALLOW_NUM}, {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM}, {OPT_SOCKET_MEM,1, NULL, OPT_SOCKET_MEM_NUM }, {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM }, @@ -105,6 +107,11 @@ eal_long_options[] = { {OPT_TELEMETRY, 0, NULL, OPT_TELEMETRY_NUM}, {OPT_NO_TELEMETRY, 0, NULL, OPT_NO_TELEMETRY_NUM }, {OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM}, + + /* legacy options that will be removed in next LTS */ + {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM}, + {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM}, + {0, 0, NULL, 0} }; @@ -1447,29 +1454,37 @@ int eal_parse_common_option(int opt, const char *optarg, struct internal_config *conf) { - static int b_used; - static int w_used; + static bool x_used, i_used; switch (opt) { - /* blacklist */ + case 'B': + fprintf(stderr, + "Option --pci-blacklist is deprecated, use -b, --block instead\n"); + /* fallthrough */ case 'b': - if (w_used) - goto bw_used; - if (eal_option_device_add(RTE_DEVTYPE_BLACKLISTED_PCI, + /* excluded list */ + if (i_used) + goto include_exclude; + if (eal_option_device_add(RTE_DEVTYPE_BLOCKED, optarg) < 0) { return -1; } - b_used = 1; + x_used = true; break; - /* whitelist */ + case 'w': - if (b_used) - goto bw_used; - if (eal_option_device_add(RTE_DEVTYPE_WHITELISTED_PCI, + fprintf(stderr, + "Option -w, --pci-whitelist is deprecated, use -a, --allow option instead\n"); + /* fallthrough */ + case 'i': + /* include device list */ + if (x_used) + goto include_exclude; + if (eal_option_device_add(RTE_DEVTYPE_ALLOWED, optarg) < 0) { return -1; } - w_used = 1; + i_used = true; break; /* coremask */ case 'c': { @@ -1760,9 +1775,10 @@ eal_parse_common_option(int opt, const char *optarg, } return 0; -bw_used: - RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " - "cannot be used at the same time\n"); + +include_exclude: + RTE_LOG(ERR, EAL, + "Options include (-i) and exclude (-x) can't be used at the same time\n"); return -1; } @@ -1997,14 +2013,14 @@ eal_common_usage(void) " -n CHANNELS Number of memory channels\n" " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r RANKSForce number of memory ranks (don't detect)\n" - " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" -
[dpdk-dev] [PATCH v5 5/5] doc: change references to blacklist and whitelist
There are two areas where documentation needed update. The first was use of whitelist when describing address filtering. The other is the legacy -w whitelist option for PCI which is used in many examples Signed-off-by: Stephen Hemminger Acked-by: Luca Boccassi --- doc/guides/cryptodevs/dpaa2_sec.rst | 6 ++-- doc/guides/cryptodevs/dpaa_sec.rst| 6 ++-- doc/guides/cryptodevs/qat.rst | 12 doc/guides/eventdevs/octeontx2.rst| 20 ++--- doc/guides/freebsd_gsg/build_sample_apps.rst | 2 +- doc/guides/linux_gsg/build_sample_apps.rst| 2 +- doc/guides/linux_gsg/eal_args.include.rst | 14 +- doc/guides/linux_gsg/linux_drivers.rst| 4 +-- doc/guides/mempool/octeontx2.rst | 4 +-- doc/guides/nics/bnxt.rst | 18 ++-- doc/guides/nics/cxgbe.rst | 12 doc/guides/nics/dpaa.rst | 6 ++-- doc/guides/nics/dpaa2.rst | 6 ++-- doc/guides/nics/enic.rst | 6 ++-- doc/guides/nics/fail_safe.rst | 16 +-- doc/guides/nics/features.rst | 2 +- doc/guides/nics/i40e.rst | 16 +-- doc/guides/nics/ice.rst | 28 +-- doc/guides/nics/ixgbe.rst | 4 +-- doc/guides/nics/mlx4.rst | 18 ++-- doc/guides/nics/mlx5.rst | 14 +- doc/guides/nics/nfb.rst | 2 +- doc/guides/nics/octeontx2.rst | 23 +++ doc/guides/nics/sfc_efx.rst | 2 +- doc/guides/nics/tap.rst | 2 +- doc/guides/nics/thunderx.rst | 4 +-- .../prog_guide/env_abstraction_layer.rst | 6 ++-- doc/guides/prog_guide/multi_proc_support.rst | 4 +-- doc/guides/prog_guide/poll_mode_drv.rst | 6 ++-- .../prog_guide/switch_representation.rst | 6 ++-- doc/guides/rel_notes/release_20_11.rst| 5 doc/guides/sample_app_ug/bbdev_app.rst| 14 +- .../sample_app_ug/eventdev_pipeline.rst | 4 +-- doc/guides/sample_app_ug/ipsec_secgw.rst | 12 doc/guides/sample_app_ug/l3_forward.rst | 7 +++-- .../sample_app_ug/l3_forward_access_ctrl.rst | 2 +- .../sample_app_ug/l3_forward_power_man.rst| 3 +- doc/guides/sample_app_ug/vdpa.rst | 2 +- doc/guides/tools/cryptoperf.rst | 6 ++-- doc/guides/tools/flow-perf.rst| 2 +- doc/guides/tools/testregex.rst| 2 +- 41 files changed, 175 insertions(+), 155 deletions(-) diff --git a/doc/guides/cryptodevs/dpaa2_sec.rst b/doc/guides/cryptodevs/dpaa2_sec.rst index 080768a2e766..83565d71752d 100644 --- a/doc/guides/cryptodevs/dpaa2_sec.rst +++ b/doc/guides/cryptodevs/dpaa2_sec.rst @@ -134,10 +134,10 @@ Supported DPAA2 SoCs * LS2088A/LS2048A * LS1088A/LS1048A -Whitelisting & Blacklisting +Allowing & Blocking +--- -For blacklisting a DPAA2 SEC device, following commands can be used. +The DPAA2 SEC device can be blocked with the following: .. code-block:: console diff --git a/doc/guides/cryptodevs/dpaa_sec.rst b/doc/guides/cryptodevs/dpaa_sec.rst index da14a68d9cff..bac82421bca2 100644 --- a/doc/guides/cryptodevs/dpaa_sec.rst +++ b/doc/guides/cryptodevs/dpaa_sec.rst @@ -82,10 +82,10 @@ Supported DPAA SoCs * LS1046A/LS1026A * LS1043A/LS1023A -Whitelisting & Blacklisting +Allowing & Blocking +--- -For blacklisting a DPAA device, following commands can be used. +For blocking a DPAA device, following commands can be used. .. code-block:: console diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst index f77ce91f76ee..f8d3d77474ff 100644 --- a/doc/guides/cryptodevs/qat.rst +++ b/doc/guides/cryptodevs/qat.rst @@ -127,7 +127,7 @@ Limitations optimisations in the GEN3 device. And if a GCM session is initialised on a GEN3 device, then attached to an op sent to a GEN1/GEN2 device, it will not be enqueued to the device and will be marked as failed. The simplest way to - mitigate this is to use the bdf whitelist to avoid mixing devices of different + mitigate this is to use the PCI allowlist to avoid mixing devices of different generations in the same process if planning to use for GCM. * The mixed algo feature on GEN2 is not supported by all kernel drivers. Check the notes under the Available Kernel Drivers table below for specific details. @@ -237,7 +237,7 @@ adjusted to the number of VFs which the QAT common code will need to handle. QAT VF may expose two crypto devices, sym and asym, it may happen that the number of devices will be bigger than MAX_DEVS and the process will show an error during PMD initialisation. To avoid t
Re: [dpdk-dev] [PATCH] net/i40e: fix FDIR issue for ETH + VLAN pattern
> -Original Message- > From: Xing, Beilei > Sent: Friday, October 23, 2020 4:15 PM > To: dev@dpdk.org > Cc: Guo, Jia ; Xing, Beilei ; > sta...@dpdk.org > Subject: [PATCH] net/i40e: fix FDIR issue for ETH + VLAN pattern > > From: Beilei Xing > > Currently, can't create more than one following flow for ETH + VLAN pattern: > > > flow create 0 ingress pattern eth / vlan vid is 350 / end > actions queue index 2 / end > > The root cause is the keys of all such flows are the same. > Create more same flow or different flow? Why the same key relate with below code change. Suggest better to make it more clear for readable. Thanks. > Fixes: 42044b69c67d ("net/i40e: support input set selection for FDIR") > Cc: sta...@dpdk.org > > Signed-off-by: Beilei Xing > --- > drivers/net/i40e/i40e_flow.c | 23 ++- > 1 file changed, 18 insertions(+), 5 deletions(-) > > diff --git a/drivers/net/i40e/i40e_flow.c b/drivers/net/i40e/i40e_flow.c > index adc5da1c53..60043322a1 100644 > --- a/drivers/net/i40e/i40e_flow.c > +++ b/drivers/net/i40e/i40e_flow.c > @@ -28,6 +28,9 @@ > #define I40E_IPV6_FRAG_HEADER44 > #define I40E_TENANT_ARRAY_NUM3 > #define I40E_TCI_MASK0x > +#define I40E_PRI_MASK0xE000 > +#define I40E_CFI_MASK0x1000 > +#define I40E_VID_MASK0x0FFF > Should below are better to show that tci involve the others when use these mask? +#define I40E_TCI_PRI_MASK 0xE000 +#define I40E_TCI_CFI_MASK 0x1000 +#define I40E_TCI_VID_MASK 0x0FFF > static int i40e_flow_validate(struct rte_eth_dev *dev, > const struct rte_flow_attr *attr, @@ -2705,12 > +2708,22 @@ i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, > > RTE_ASSERT(!(input_set & > I40E_INSET_LAST_ETHER_TYPE)); > if (vlan_spec && vlan_mask) { > - if (vlan_mask->tci == > - rte_cpu_to_be_16(I40E_TCI_MASK)) { > - input_set |= > I40E_INSET_VLAN_INNER; > - filter->input.flow_ext.vlan_tci = > - vlan_spec->tci; > + if (vlan_mask->tci != > + rte_cpu_to_be_16(I40E_TCI_MASK) && > + vlan_mask->tci != > + rte_cpu_to_be_16(I40E_PRI_MASK) && > + vlan_mask->tci != > + rte_cpu_to_be_16(I40E_CFI_MASK) && > + vlan_mask->tci != > + rte_cpu_to_be_16(I40E_VID_MASK)) { > + rte_flow_error_set(error, EINVAL, > + > RTE_FLOW_ERROR_TYPE_ITEM, > +item, > +"Unsupported TCI mask."); > } > + input_set |= I40E_INSET_VLAN_INNER; > + filter->input.flow_ext.vlan_tci = > + vlan_spec->tci; > } > if (vlan_spec && vlan_mask && vlan_mask- > >inner_type) { > if (vlan_mask->inner_type != > RTE_BE16(0x)) { > -- > 2.26.2
[dpdk-dev] [PATCH v2 00/25] *net/mlx5: support multiple-thread flow operations
This patch set contains multiple-thread flow operations support for the flow objects. The new added PMD features after RC1 MT support is not done yet. The sh lock removing patch will be moved to the next series. Suanming Mou (11): net/mlx5: use thread safe index pool for flow objects net/mlx5: make meter action thread safe net/mlx5: make VLAN network interface thread safe net/mlx5: create global jump action net/mlx5: create global default miss action net/mlx5: create global drop action net/mlx5: fix redundant Direct Verbs resources allocate net/mlx5: remove unused mreg copy code net/mlx5: make header reformat action thread safe net/mlx5: remove unused hash list operations net/mlx5: make Rx queue thread safe Xueming Li (14): net/mlx5: use thread specific flow workspace net/mlx5: reuse flow Id as hairpin Id net/mlx5: indexed pool supports zero size entry net/mlx5: use indexed pool for RSS flow ID net/mlx5: make rte flow list thread safe net/mlx5: support concurrent access for hash list net/mlx5: make flow table cache thread safe net/mlx5: make flow tag list thread safe net/mlx5: make flow modify action list thread safe net/mlx5: make metadata copy flow list thread safe net/mlx5: introduce thread safe linked list cache net/mlx5: make matcher list thread safe net/mlx5: make port ID action cache thread safe net/mlx5: make push VLAN action cache thread safe --- v2: - fix review comments. - remove unused mreg copy code. - drop the sh lock removing patch as new feature still not fully updated. --- drivers/common/mlx5/linux/mlx5_nl.h |1 + drivers/net/mlx5/linux/mlx5_os.c | 100 ++- drivers/net/mlx5/linux/mlx5_vlan_os.c |5 + drivers/net/mlx5/mlx5.c | 266 +-- drivers/net/mlx5/mlx5.h | 63 +- drivers/net/mlx5/mlx5_flow.c | 590 ++ drivers/net/mlx5/mlx5_flow.h | 124 ++- drivers/net/mlx5/mlx5_flow_dv.c | 1393 +++-- drivers/net/mlx5/mlx5_flow_meter.c| 72 +- drivers/net/mlx5/mlx5_flow_verbs.c| 84 +- drivers/net/mlx5/mlx5_rxq.c | 235 +++--- drivers/net/mlx5/mlx5_rxtx.h | 20 +- drivers/net/mlx5/mlx5_utils.c | 345 ++-- drivers/net/mlx5/mlx5_utils.h | 320 ++-- 14 files changed, 1856 insertions(+), 1762 deletions(-) -- 1.8.3.1
[dpdk-dev] [PATCH v2 01/25] net/mlx5: use thread safe index pool for flow objects
As mlx5 PMD is changed to be thread safe, all the flow-related sub-objects inside the PMD should be thread safe. This commit changes the index memory pools' lock configuration to be enabled. That makes the index pool be thread safe. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index e4ce9a9..e1df11f 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -191,7 +191,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -202,7 +202,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -213,7 +213,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -224,7 +224,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -235,7 +235,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -269,7 +269,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -280,7 +280,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -291,7 +291,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, @@ -306,7 +306,7 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .trunk_size = 64, .grow_trunk = 3, .grow_shift = 2, - .need_lock = 0, + .need_lock = 1, .release_mem_en = 1, .malloc = mlx5_malloc, .free = mlx5_free, -- 1.8.3.1
[dpdk-dev] [PATCH v2 02/25] net/mlx5: use thread specific flow workspace
From: Xueming Li As part of multi-thread flow support, this patch moves flow intermediate data to thread specific, makes them a flow workspace. The workspace is allocated per thread, destroyed along with thread life-cycle. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 5 -- drivers/net/mlx5/mlx5.c| 2 - drivers/net/mlx5/mlx5.h| 6 -- drivers/net/mlx5/mlx5_flow.c | 159 +++-- drivers/net/mlx5/mlx5_flow.h | 15 +++- drivers/net/mlx5/mlx5_flow_dv.c| 41 +- drivers/net/mlx5/mlx5_flow_verbs.c | 24 +++--- 7 files changed, 166 insertions(+), 86 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 40f9446..1313dee 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1431,11 +1431,6 @@ err = ENOTSUP; goto error; } - /* -* Allocate the buffer for flow creating, just once. -* The allocation must be done before any flow creating. -*/ - mlx5_flow_alloc_intermediate(eth_dev); /* Query availability of metadata reg_c's. */ err = mlx5_flow_discover_mreg_c(eth_dev); if (err < 0) { diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index e1df11f..faf947f 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1402,8 +1402,6 @@ struct mlx5_dev_ctx_shared * */ mlx5_flow_list_flush(dev, &priv->flows, true); mlx5_flow_meter_flush(dev, NULL); - /* Free the intermediate buffers for flow creation. */ - mlx5_flow_free_intermediate(dev); /* Prevent crashes when queues are still in use. */ dev->rx_pkt_burst = removed_rx_burst; dev->tx_pkt_burst = removed_tx_burst; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index c9d5d71..bfb0c28 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -847,10 +847,6 @@ struct mlx5_priv { struct mlx5_drop drop_queue; /* Flow drop queues. */ uint32_t flows; /* RTE Flow rules. */ uint32_t ctrl_flows; /* Control flow rules. */ - void *inter_flows; /* Intermediate resources for flow creation. */ - void *rss_desc; /* Intermediate rss description resources. */ - int flow_idx; /* Intermediate device flow index. */ - int flow_nested_idx; /* Intermediate device flow index, nested. */ struct mlx5_obj_ops obj_ops; /* HW objects operations. */ LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */ LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */ @@ -1074,8 +1070,6 @@ int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, void mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list); int mlx5_flow_start_default(struct rte_eth_dev *dev); void mlx5_flow_stop_default(struct rte_eth_dev *dev); -void mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev); -void mlx5_flow_free_intermediate(struct rte_eth_dev *dev); int mlx5_flow_verify(struct rte_eth_dev *dev); int mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev, uint32_t queue); int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index d7243a8..2f2b97f 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -634,6 +634,13 @@ struct mlx5_flow_tunnel_info { }, }; +/* Key of thread specific flow workspace data. */ +static pthread_key_t key_workspace; + +/* Thread specific flow workspace data once initialization data. */ +static pthread_once_t key_workspace_init; + + /** * Translate tag ID to register. * @@ -5005,6 +5012,38 @@ struct mlx5_flow_tunnel_info { } /** + * Adjust flow RSS workspace if needed. + * + * @param wks + * Pointer to thread flow work space. + * @param rss_desc + * Pointer to RSS descriptor. + * @param[in] nrssq_num + * New RSS queue number. + * + * @return + * 0 on success, -1 otherwise and rte_errno is set. + */ +static int +flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks, + struct mlx5_flow_rss_desc *rss_desc, + uint32_t nrssq_num) +{ + bool fidx = !!wks->flow_idx; + + if (likely(nrssq_num <= wks->rssq_num[fidx])) + return 0; + rss_desc->queue = realloc(rss_desc->queue, + sizeof(rss_desc->queue[0]) * RTE_ALIGN(nrssq_num, 2)); + if (!rss_desc->queue) { + rte_errno = ENOMEM; + return -1; + } + wks->rssq_num[fidx] = RTE_ALIGN(nrssq_num, 2); + return 0; +} + +/** * Create a flow and add it to @p list. * * @param dev @@ -5056,8 +5095,7 @@ struct mlx5_flow_tunnel_info { uint8_t buffer[2048]; } items_tx; struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; - struct mlx5_flow_rss_d
[dpdk-dev] [PATCH v2 05/25] net/mlx5: use indexed pool for RSS flow ID
From: Xueming Li The flow ID generation API used an integer pool to save released ID, The only usage is to generate RSS flow ID. To support multiple flow, it has to be enhanced to be thread safe. Indexed pool could be used to generate unique ID by setting size of pool entry to zero. Since bitmap is used, an extra benefits is saving memory to about one bit per entry. Further more indexed pool could be thread safe by enabling lock. This patch leverages indexed pool to generate RSS flow ID, removes unused flow ID generating API. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 13 drivers/net/mlx5/mlx5.c | 125 ++- drivers/net/mlx5/mlx5.h | 11 +--- drivers/net/mlx5/mlx5_flow.c | 57 -- drivers/net/mlx5/mlx5_flow.h | 5 -- drivers/net/mlx5/mlx5_utils.c| 5 ++ 6 files changed, 35 insertions(+), 181 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 1313dee..7e90de4 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1377,17 +1377,6 @@ err = mlx5_alloc_shared_dr(priv); if (err) goto error; - /* -* RSS id is shared with meter flow id. Meter flow id can only -* use the 24 MSB of the register. -*/ - priv->qrss_id_pool = mlx5_flow_id_pool_alloc(UINT32_MAX >> -MLX5_MTR_COLOR_BITS); - if (!priv->qrss_id_pool) { - DRV_LOG(ERR, "can't create flow id pool"); - err = ENOMEM; - goto error; - } } if (config->devx && config->dv_flow_en && config->dest_tir) { priv->obj_ops = devx_obj_ops; @@ -1474,8 +1463,6 @@ close(priv->nl_socket_rdma); if (priv->vmwa_context) mlx5_vlan_vmwa_exit(priv->vmwa_context); - if (priv->qrss_id_pool) - mlx5_flow_id_pool_release(priv->qrss_id_pool); if (own_domain_id) claim_zero(rte_eth_switch_domain_free(priv->domain_id)); mlx5_free(priv); diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ce3de82..327e023 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -321,6 +321,11 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = .free = mlx5_free, .type = "rte_flow_ipool", }, + { + .size = 0, + .need_lock = 1, + .type = "mlx5_flow_rss_id_ipool", + }, }; @@ -329,126 +334,6 @@ static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list = #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096 -/** - * Allocate ID pool structure. - * - * @param[in] max_id - * The maximum id can be allocated from the pool. - * - * @return - * Pointer to pool object, NULL value otherwise. - */ -struct mlx5_flow_id_pool * -mlx5_flow_id_pool_alloc(uint32_t max_id) -{ - struct mlx5_flow_id_pool *pool; - void *mem; - - pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), - RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); - if (!pool) { - DRV_LOG(ERR, "can't allocate id pool"); - rte_errno = ENOMEM; - return NULL; - } - mem = mlx5_malloc(MLX5_MEM_ZERO, - MLX5_FLOW_MIN_ID_POOL_SIZE * sizeof(uint32_t), - RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); - if (!mem) { - DRV_LOG(ERR, "can't allocate mem for id pool"); - rte_errno = ENOMEM; - goto error; - } - pool->free_arr = mem; - pool->curr = pool->free_arr; - pool->last = pool->free_arr + MLX5_FLOW_MIN_ID_POOL_SIZE; - pool->base_index = 0; - pool->max_id = max_id; - return pool; -error: - mlx5_free(pool); - return NULL; -} - -/** - * Release ID pool structure. - * - * @param[in] pool - * Pointer to flow id pool object to free. - */ -void -mlx5_flow_id_pool_release(struct mlx5_flow_id_pool *pool) -{ - mlx5_free(pool->free_arr); - mlx5_free(pool); -} - -/** - * Generate ID. - * - * @param[in] pool - * Pointer to flow id pool. - * @param[out] id - * The generated ID. - * - * @return - * 0 on success, error value otherwise. - */ -uint32_t -mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id) -{ - if (pool->curr == pool->free_arr) { - if (pool->base_index == pool->max_id) { - rte_errno = ENOMEM; - DRV_LOG(ERR, "no free id"); - return -rte_errno; - } - *id = ++pool->base_index; - return 0; - } -
[dpdk-dev] [PATCH v2 03/25] net/mlx5: reuse flow Id as hairpin Id
From: Xueming Li Hairpin flow matching required a unique flow ID for matching. This patch reuses flow ID as hairpin flow ID, this will save some code to generate a separate hairpin ID, also saves flow memory by removing hairpin ID. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.c | 11 --- drivers/net/mlx5/mlx5.h | 1 - drivers/net/mlx5/mlx5_flow.c | 32 ++-- drivers/net/mlx5/mlx5_flow.h | 6 +- 4 files changed, 11 insertions(+), 39 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index faf947f..ce3de82 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -979,13 +979,6 @@ struct mlx5_dev_ctx_shared * MLX5_ASSERT(sh->devx_rx_uar); MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar)); } - sh->flow_id_pool = mlx5_flow_id_pool_alloc - ((1 << HAIRPIN_FLOW_ID_BITS) - 1); - if (!sh->flow_id_pool) { - DRV_LOG(ERR, "can't create flow id pool"); - err = ENOMEM; - goto error; - } #ifndef RTE_ARCH_64 /* Initialize UAR access locks for 32bit implementations. */ rte_spinlock_init(&sh->uar_lock_cq); @@ -1047,8 +1040,6 @@ struct mlx5_dev_ctx_shared * claim_zero(mlx5_glue->dealloc_pd(sh->pd)); if (sh->ctx) claim_zero(mlx5_glue->close_device(sh->ctx)); - if (sh->flow_id_pool) - mlx5_flow_id_pool_release(sh->flow_id_pool); mlx5_free(sh); MLX5_ASSERT(err > 0); rte_errno = err; @@ -1119,8 +1110,6 @@ struct mlx5_dev_ctx_shared * mlx5_glue->devx_free_uar(sh->devx_rx_uar); if (sh->ctx) claim_zero(mlx5_glue->close_device(sh->ctx)); - if (sh->flow_id_pool) - mlx5_flow_id_pool_release(sh->flow_id_pool); pthread_mutex_destroy(&sh->txpp.mutex); mlx5_free(sh); return; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index bfb0c28..f6d38d4 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -666,7 +666,6 @@ struct mlx5_dev_ctx_shared { void *devx_comp; /* DEVX async comp obj. */ struct mlx5_devx_obj *tis; /* TIS object. */ struct mlx5_devx_obj *td; /* Transport domain. */ - struct mlx5_flow_id_pool *flow_id_pool; /* Flow ID pool. */ void *tx_uar; /* Tx/packet pacing shared UAR. */ struct mlx5_flex_parser_profiles fp[MLX5_FLEX_PARSER_MAX]; /* Flex parser profiles information. */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 2f2b97f..bb6fd74 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -3784,9 +3784,8 @@ struct mlx5_flow_tunnel_info { struct rte_flow_action actions_rx[], struct rte_flow_action actions_tx[], struct rte_flow_item pattern_tx[], - uint32_t *flow_id) + uint32_t flow_id) { - struct mlx5_priv *priv = dev->data->dev_private; const struct rte_flow_action_raw_encap *raw_encap; const struct rte_flow_action_raw_decap *raw_decap; struct mlx5_rte_flow_action_set_tag *set_tag; @@ -3796,7 +3795,6 @@ struct mlx5_flow_tunnel_info { char *addr; int encap = 0; - mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id); for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { switch (actions->type) { case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: @@ -3865,7 +3863,7 @@ struct mlx5_flow_tunnel_info { set_tag = (void *)actions_rx; set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); MLX5_ASSERT(set_tag->id > REG_NON); - set_tag->data = *flow_id; + set_tag->data = flow_id; tag_action->conf = set_tag; /* Create Tx item list. */ rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); @@ -3874,7 +3872,7 @@ struct mlx5_flow_tunnel_info { item->type = (enum rte_flow_item_type) MLX5_RTE_FLOW_ITEM_TYPE_TAG; tag_item = (void *)addr; - tag_item->data = *flow_id; + tag_item->data = flow_id; tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); MLX5_ASSERT(set_tag->id > REG_NON); item->spec = tag_item; @@ -5100,7 +5098,6 @@ struct mlx5_flow_tunnel_info { uint32_t i; uint32_t idx = 0; int hairpin_flow; - uint32_t hairpin_id = 0; struct rte_flow_attr attr_tx = { .priority = 0 }; struct rte_flow_attr attr_factor = {0}; struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace(); @@ -5117,24 +5114,22 @@ struct mlx5_flow_tunnel_info { external, hairpin_flow, error); if (ret < 0) return 0; + flow =
[dpdk-dev] [PATCH v2 04/25] net/mlx5: indexed pool supports zero size entry
From: Xueming Li To make indexed pool to be used as ID generator, this patch allows entry size to be zero. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index 9a54fda..3a1f87a 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -230,7 +230,7 @@ struct mlx5_indexed_pool * struct mlx5_indexed_pool *pool; uint32_t i; - if (!cfg || !cfg->size || (!cfg->malloc ^ !cfg->free) || + if (!cfg || (!cfg->malloc ^ !cfg->free) || (cfg->trunk_size && ((cfg->trunk_size & (cfg->trunk_size - 1)) || ((__builtin_ffs(cfg->trunk_size) + TRUNK_IDX_BITS) > 32 return NULL; @@ -391,7 +391,7 @@ struct mlx5_indexed_pool * { void *entry = mlx5_ipool_malloc(pool, idx); - if (entry) + if (entry && pool->cfg.size) memset(entry, 0, pool->cfg.size); return entry; } -- 1.8.3.1
[dpdk-dev] [PATCH v2 07/25] net/mlx5: make meter action thread safe
This commit adds the spinlock for the meter action to make it be thread safe. Atomic reference counter in all is not enough as the meter action should be created synchronized with reference counter increment. With only atomic reference counter, even the counter is increased, the action may still not be created. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_flow.h | 2 ++ drivers/net/mlx5/mlx5_flow_meter.c | 72 -- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 52680ad..637922e 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -837,6 +837,8 @@ struct mlx5_flow_meter { struct mlx5_flow_meter_profile *profile; /**< Meter profile parameters. */ + rte_spinlock_t sl; /**< Meter action spinlock. */ + /** Policer actions (per meter output color). */ enum rte_mtr_policer_action action[RTE_COLORS]; diff --git a/drivers/net/mlx5/mlx5_flow_meter.c b/drivers/net/mlx5/mlx5_flow_meter.c index b36bc7b..03a5e79 100644 --- a/drivers/net/mlx5/mlx5_flow_meter.c +++ b/drivers/net/mlx5/mlx5_flow_meter.c @@ -679,6 +679,7 @@ fm->shared = !!shared; fm->policer_stats.stats_mask = params->stats_mask; fm->profile->ref_cnt++; + rte_spinlock_init(&fm->sl); return 0; error: mlx5_flow_destroy_policer_rules(dev, fm, &attr); @@ -1167,49 +1168,49 @@ struct mlx5_flow_meter * struct rte_flow_error *error) { struct mlx5_flow_meter *fm; + int ret = 0; fm = mlx5_flow_meter_find(priv, meter_id); if (fm == NULL) { rte_flow_error_set(error, ENOENT, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "Meter object id not valid"); - goto error; - } - if (!fm->shared && fm->ref_cnt) { - DRV_LOG(ERR, "Cannot share a non-shared meter."); - rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "Meter can't be shared"); - goto error; + return fm; } - if (!fm->ref_cnt++) { - MLX5_ASSERT(!fm->mfts->meter_action); + rte_spinlock_lock(&fm->sl); + if (fm->mfts->meter_action) { + if (fm->shared && + attr->transfer == fm->transfer && + attr->ingress == fm->ingress && + attr->egress == fm->egress) + fm->ref_cnt++; + else + ret = -1; + } else { fm->ingress = attr->ingress; fm->egress = attr->egress; fm->transfer = attr->transfer; +fm->ref_cnt = 1; /* This also creates the meter object. */ fm->mfts->meter_action = mlx5_flow_meter_action_create(priv, fm); - if (!fm->mfts->meter_action) - goto error_detach; - } else { - MLX5_ASSERT(fm->mfts->meter_action); - if (attr->transfer != fm->transfer || - attr->ingress != fm->ingress || - attr->egress != fm->egress) { - DRV_LOG(ERR, "meter I/O attributes do not " - "match flow I/O attributes."); - goto error_detach; + if (!fm->mfts->meter_action) { + fm->ref_cnt = 0; + fm->ingress = 0; + fm->egress = 0; + fm->transfer = 0; + ret = -1; + DRV_LOG(ERR, "Meter action create failed."); } } - return fm; -error_detach: - mlx5_flow_meter_detach(fm); - rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - fm->mfts->meter_action ? "Meter attr not match" : - "Meter action create failed"); -error: - return NULL; + rte_spinlock_unlock(&fm->sl); + if (ret) + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + fm->mfts->meter_action ? + "Meter attr not match" : + "Meter action create failed"); + return ret ? NULL : fm; } /** @@ -1222,15 +1223,16 @@ struct mlx5_flow_meter * mlx5_flow_meter_detach(struct mlx5_flow_meter *fm) { #ifdef HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER + rte_spinlock_lock(&fm->sl); MLX5_ASSERT(fm->ref_cnt); - if (--fm->ref_cnt) - return; - if (fm->mfts->met
[dpdk-dev] [PATCH v2 08/25] net/mlx5: make VLAN network interface thread safe
This commit protects the VLAN VM workaround area using a spinlock in multiple-thread flow insertion to make it thread safe. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/common/mlx5/linux/mlx5_nl.h | 1 + drivers/net/mlx5/linux/mlx5_vlan_os.c | 5 + 2 files changed, 6 insertions(+) diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h index 53021e1..15129ff 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.h +++ b/drivers/common/mlx5/linux/mlx5_nl.h @@ -25,6 +25,7 @@ struct mlx5_nl_vlan_dev { struct mlx5_nl_vlan_vmwa_context { int nl_socket; uint32_t vf_ifindex; + rte_spinlock_t sl; struct mlx5_nl_vlan_dev vlan_dev[4096]; }; diff --git a/drivers/net/mlx5/linux/mlx5_vlan_os.c b/drivers/net/mlx5/linux/mlx5_vlan_os.c index 92fc17d..40e895e 100644 --- a/drivers/net/mlx5/linux/mlx5_vlan_os.c +++ b/drivers/net/mlx5/linux/mlx5_vlan_os.c @@ -44,12 +44,14 @@ if (!vlan->created || !vmwa) return; vlan->created = 0; + rte_spinlock_lock(&vmwa->sl); MLX5_ASSERT(vlan_dev[vlan->tag].refcnt); if (--vlan_dev[vlan->tag].refcnt == 0 && vlan_dev[vlan->tag].ifindex) { mlx5_nl_vlan_vmwa_delete(vmwa, vlan_dev[vlan->tag].ifindex); vlan_dev[vlan->tag].ifindex = 0; } + rte_spinlock_unlock(&vmwa->sl); } /** @@ -72,6 +74,7 @@ MLX5_ASSERT(priv->vmwa_context); if (vlan->created || !vmwa) return; + rte_spinlock_lock(&vmwa->sl); if (vlan_dev[vlan->tag].refcnt == 0) { MLX5_ASSERT(!vlan_dev[vlan->tag].ifindex); vlan_dev[vlan->tag].ifindex = @@ -82,6 +85,7 @@ vlan_dev[vlan->tag].refcnt++; vlan->created = 1; } + rte_spinlock_unlock(&vmwa->sl); } /* @@ -131,6 +135,7 @@ " for VLAN workaround context"); return NULL; } + rte_spinlock_init(&vmwa->sl); vmwa->nl_socket = mlx5_nl_init(NETLINK_ROUTE); if (vmwa->nl_socket < 0) { DRV_LOG(WARNING, -- 1.8.3.1
[dpdk-dev] [PATCH v2 06/25] net/mlx5: make rte flow list thread safe
From: Xueming Li To support multi-thread flow operations, this patch introduces list lock for the rte_flow list manages all the rte_flow handlers. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 1 + drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_flow.c | 10 -- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 7e90de4..287ed13 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1340,6 +1340,7 @@ MLX5_MAX_MAC_ADDRESSES); priv->flows = 0; priv->ctrl_flows = 0; + rte_spinlock_init(&priv->flow_list_lock); TAILQ_INIT(&priv->flow_meters); TAILQ_INIT(&priv->flow_meter_profiles); /* Hint libmlx5 to use PMD allocator for data plane resources */ diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index fafbb5c..62b3ee0 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -838,6 +838,7 @@ struct mlx5_priv { struct mlx5_drop drop_queue; /* Flow drop queues. */ uint32_t flows; /* RTE Flow rules. */ uint32_t ctrl_flows; /* Control flow rules. */ + rte_spinlock_t flow_list_lock; struct mlx5_obj_ops obj_ops; /* HW objects operations. */ LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */ LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */ diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index aa748a4..c6d3cc4 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -5217,9 +5217,12 @@ struct mlx5_flow_tunnel_info { if (ret < 0) goto error; } - if (list) + if (list) { + rte_spinlock_lock(&priv->flow_list_lock); ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, flow, next); + rte_spinlock_unlock(&priv->flow_list_lock); + } flow_rxq_flags_set(dev, flow); /* Nested flow creation index recovery. */ wks->flow_idx = wks->flow_nested_idx; @@ -5375,9 +5378,12 @@ struct rte_flow * if (dev->data->dev_started) flow_rxq_flags_trim(dev, flow); flow_drv_destroy(dev, flow); - if (list) + if (list) { + rte_spinlock_lock(&priv->flow_list_lock); ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, flow_idx, flow, next); + rte_spinlock_unlock(&priv->flow_list_lock); + } flow_mreg_del_copy_action(dev, flow); if (flow->fdir) { LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) { -- 1.8.3.1
[dpdk-dev] [PATCH v2 10/25] net/mlx5: create global default miss action
This commit creates the global default miss action instead of maintain it in flow insertion time. This makes the action to be thread safe. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 drivers/net/mlx5/mlx5.h | 9 +--- drivers/net/mlx5/mlx5_flow_dv.c | 88 +++- 3 files changed, 13 insertions(+), 91 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 287ed13..ba31238 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -299,6 +299,10 @@ } sh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan(); #endif /* HAVE_MLX5DV_DR */ + sh->default_miss_action = + mlx5_glue->dr_create_flow_action_default_miss(); + if (!sh->default_miss_action) + DRV_LOG(WARNING, "Default miss action is not supported."); return 0; error: /* Rollback the created objects. */ @@ -378,6 +382,9 @@ } pthread_mutex_destroy(&sh->dv_mutex); #endif /* HAVE_MLX5DV_DR */ + if (sh->default_miss_action) + mlx5_glue->destroy_flow_action + (sh->default_miss_action); if (sh->encaps_decaps) { mlx5_hlist_destroy(sh->encaps_decaps, NULL, NULL); sh->encaps_decaps = NULL; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 62b3ee0..ed16b0d 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -458,12 +458,6 @@ struct mlx5_flow_counter_mng { LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws; }; -/* Default miss action resource structure. */ -struct mlx5_flow_default_miss_resource { - void *action; /* Pointer to the rdma-core action. */ - rte_atomic32_t refcnt; /* Default miss action reference counter. */ -}; - #define MLX5_AGE_EVENT_NEW 1 #define MLX5_AGE_TRIGGER 2 #define MLX5_AGE_SET(age_info, BIT) \ @@ -647,8 +641,7 @@ struct mlx5_dev_ctx_shared { uint32_t sample_action_list; /* List of sample actions. */ uint32_t dest_array_list; /* List of destination array actions. */ struct mlx5_flow_counter_mng cmng; /* Counters management structure. */ - struct mlx5_flow_default_miss_resource default_miss; - /* Default miss action resource structure. */ + void *default_miss_action; /* Default miss action. */ struct mlx5_indexed_pool *ipool[MLX5_IPOOL_MAX]; /* Memory Pool for mlx5 flow resources. */ struct mlx5_l3t_tbl *cnt_id_tbl; /* Shared counter lookup table. */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 3f5a270..6e17594 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -74,9 +74,6 @@ struct mlx5_flow_tbl_resource *tbl); static int -flow_dv_default_miss_resource_release(struct rte_eth_dev *dev); - -static int flow_dv_encap_decap_resource_release(struct rte_eth_dev *dev, uint32_t encap_decap_idx); @@ -2949,42 +2946,6 @@ struct field_modify_info modify_tcp[] = { } /** - * Find existing default miss resource or create and register a new one. - * - * @param[in, out] dev - * Pointer to rte_eth_dev structure. - * @param[out] error - * pointer to error structure. - * - * @return - * 0 on success otherwise -errno and errno is set. - */ -static int -flow_dv_default_miss_resource_register(struct rte_eth_dev *dev, - struct rte_flow_error *error) -{ - struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_dev_ctx_shared *sh = priv->sh; - struct mlx5_flow_default_miss_resource *cache_resource = - &sh->default_miss; - int cnt = rte_atomic32_read(&cache_resource->refcnt); - - if (!cnt) { - MLX5_ASSERT(cache_resource->action); - cache_resource->action = - mlx5_glue->dr_create_flow_action_default_miss(); - if (!cache_resource->action) - return rte_flow_error_set(error, ENOMEM, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "cannot create default miss action"); - DRV_LOG(DEBUG, "new default miss resource %p: refcnt %d++", - (void *)cache_resource->action, cnt); - } - rte_atomic32_inc(&cache_resource->refcnt); - return 0; -} - -/** * Find existing table port ID resource or create and register a new one. * * @param[in, out] dev @@ -10038,16 +,14 @@ struct field_modify_info modify_tcp[] = { dh->rix_hrxq = hrxq_idx; dv->actions[n++] = hrxq->action; } else if (dh->fate_action == MLX5_FLOW_FATE_DEFAULT_MISS) { - if (flo
[dpdk-dev] [PATCH v2 09/25] net/mlx5: create global jump action
This commit changes the jump action in table to be created with table creation in advanced. In this case, the jump action is safe to be used in multiple thread. The jump action will be destroyed when table is not used anymore and released. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_flow.h| 2 -- drivers/net/mlx5/mlx5_flow_dv.c | 54 - 2 files changed, 16 insertions(+), 40 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 637922e..3d325b2 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -481,8 +481,6 @@ struct mlx5_flow_dv_modify_hdr_resource { /* Jump action resource structure. */ struct mlx5_flow_dv_jump_tbl_resource { - rte_atomic32_t refcnt; /**< Reference counter. */ - uint8_t ft_type; /**< Flow table type, Rx or Tx. */ void *action; /**< Pointer to the rdma core action. */ }; diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index df36a24..3f5a270 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -2936,31 +2936,13 @@ struct field_modify_info modify_tcp[] = { (struct rte_eth_dev *dev __rte_unused, struct mlx5_flow_tbl_resource *tbl, struct mlx5_flow *dev_flow, -struct rte_flow_error *error) +struct rte_flow_error *error __rte_unused) { struct mlx5_flow_tbl_data_entry *tbl_data = container_of(tbl, struct mlx5_flow_tbl_data_entry, tbl); - int cnt, ret; MLX5_ASSERT(tbl); - cnt = rte_atomic32_read(&tbl_data->jump.refcnt); - if (!cnt) { - ret = mlx5_flow_os_create_flow_action_dest_flow_tbl - (tbl->obj, &tbl_data->jump.action); - if (ret) - return rte_flow_error_set(error, ENOMEM, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "cannot create jump action"); - DRV_LOG(DEBUG, "new jump table resource %p: refcnt %d++", - (void *)&tbl_data->jump, cnt); - } else { - /* old jump should not make the table ref++. */ - flow_dv_tbl_resource_release(dev, &tbl_data->tbl); - MLX5_ASSERT(tbl_data->jump.action); - DRV_LOG(DEBUG, "existed jump table resource %p: refcnt %d++", - (void *)&tbl_data->jump, cnt); - } - rte_atomic32_inc(&tbl_data->jump.refcnt); + MLX5_ASSERT(tbl_data->jump.action); dev_flow->handle->rix_jump = tbl_data->idx; dev_flow->dv.jump = &tbl_data->jump; return 0; @@ -7935,8 +7917,19 @@ struct field_modify_info modify_tcp[] = { * count before insert it into the hash list. */ rte_atomic32_init(&tbl->refcnt); - /* Jump action reference count is initialized here. */ - rte_atomic32_init(&tbl_data->jump.refcnt); + if (table_id) { + ret = mlx5_flow_os_create_flow_action_dest_flow_tbl + (tbl->obj, &tbl_data->jump.action); + if (ret) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "cannot create flow jump action"); + mlx5_flow_os_destroy_flow_tbl(tbl->obj); + mlx5_ipool_free(sh->ipool[MLX5_IPOOL_JUMP], idx); + return NULL; + } + } pos->key = table_key.v64; ret = mlx5_hlist_insert(sh->flow_tbls, pos); if (ret < 0) { @@ -10193,28 +10186,13 @@ struct field_modify_info modify_tcp[] = { struct mlx5_flow_handle *handle) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_flow_dv_jump_tbl_resource *cache_resource; struct mlx5_flow_tbl_data_entry *tbl_data; tbl_data = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_JUMP], handle->rix_jump); if (!tbl_data) return 0; - cache_resource = &tbl_data->jump; - MLX5_ASSERT(cache_resource->action); - DRV_LOG(DEBUG, "jump table resource %p: refcnt %d--", - (void *)cache_resource, - rte_atomic32_read(&cache_resource->refcnt)); - if (rte_atomic32_dec_and_test(&cache_resource->refcnt)) { - claim_zero(mlx5_flow_os_destroy_flow_action - (cache_resource->action)); - /* jump action memory free is inside the table release. */ - flow_dv_tbl_resource_release(dev, &tbl_data->tbl); - DRV_LOG(DEBUG, "jump
[dpdk-dev] [PATCH v2 12/25] net/mlx5: support concurrent access for hash list
From: Xueming Li In order to support hash list concurrent access, adding next: 1. List level read/write lock. 2. Entry reference counter. 3. Entry create/match/remove callback. 4. Remove insert/lookup/remove function which are not thread safe. 5. Add register/unregister function to support entry reuse. For better performance, lookup function uses read lock to allow concurrent lookup from different thread, all other hash list modification functions uses write lock which blocks concurrent modification and lookups from other thread. The exact objects change will be applied in the next patches. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 27 --- drivers/net/mlx5/mlx5.c | 13 ++-- drivers/net/mlx5/mlx5_flow.c | 7 +- drivers/net/mlx5/mlx5_flow_dv.c | 6 +- drivers/net/mlx5/mlx5_utils.c| 154 --- drivers/net/mlx5/mlx5_utils.h| 149 ++--- 6 files changed, 276 insertions(+), 80 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 0900307..929fed2 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -236,14 +236,16 @@ return err; /* Create tags hash list table. */ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); - sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE); + sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0, + 0, NULL, NULL, NULL); if (!sh->tag_table) { DRV_LOG(ERR, "tags with hash creation failed."); err = ENOMEM; goto error; } snprintf(s, sizeof(s), "%s_hdr_modify", sh->ibdev_name); - sh->modify_cmds = mlx5_hlist_create(s, MLX5_FLOW_HDR_MODIFY_HTABLE_SZ); + sh->modify_cmds = mlx5_hlist_create(s, MLX5_FLOW_HDR_MODIFY_HTABLE_SZ, + 0, 0, NULL, NULL, NULL); if (!sh->modify_cmds) { DRV_LOG(ERR, "hdr modify hash creation failed"); err = ENOMEM; @@ -251,7 +253,8 @@ } snprintf(s, sizeof(s), "%s_encaps_decaps", sh->ibdev_name); sh->encaps_decaps = mlx5_hlist_create(s, - MLX5_FLOW_ENCAP_DECAP_HTABLE_SZ); + MLX5_FLOW_ENCAP_DECAP_HTABLE_SZ, + 0, 0, NULL, NULL, NULL); if (!sh->encaps_decaps) { DRV_LOG(ERR, "encap decap hash creation failed"); err = ENOMEM; @@ -327,16 +330,16 @@ sh->pop_vlan_action = NULL; } if (sh->encaps_decaps) { - mlx5_hlist_destroy(sh->encaps_decaps, NULL, NULL); + mlx5_hlist_destroy(sh->encaps_decaps); sh->encaps_decaps = NULL; } if (sh->modify_cmds) { - mlx5_hlist_destroy(sh->modify_cmds, NULL, NULL); + mlx5_hlist_destroy(sh->modify_cmds); sh->modify_cmds = NULL; } if (sh->tag_table) { /* tags should be destroyed with flow before. */ - mlx5_hlist_destroy(sh->tag_table, NULL, NULL); + mlx5_hlist_destroy(sh->tag_table); sh->tag_table = NULL; } mlx5_free_table_hash_list(priv); @@ -386,16 +389,16 @@ mlx5_glue->destroy_flow_action (sh->default_miss_action); if (sh->encaps_decaps) { - mlx5_hlist_destroy(sh->encaps_decaps, NULL, NULL); + mlx5_hlist_destroy(sh->encaps_decaps); sh->encaps_decaps = NULL; } if (sh->modify_cmds) { - mlx5_hlist_destroy(sh->modify_cmds, NULL, NULL); + mlx5_hlist_destroy(sh->modify_cmds); sh->modify_cmds = NULL; } if (sh->tag_table) { /* tags should be destroyed with flow before. */ - mlx5_hlist_destroy(sh->tag_table, NULL, NULL); + mlx5_hlist_destroy(sh->tag_table); sh->tag_table = NULL; } mlx5_free_table_hash_list(priv); @@ -1454,7 +1457,9 @@ mlx5_flow_ext_mreg_supported(eth_dev) && priv->sh->dv_regc0_mask) { priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME, - MLX5_FLOW_MREG_HTABLE_SZ); + MLX5_FLOW_MREG_HTABLE_SZ, + 0, 0, + NULL, NULL, NULL); if (!priv->mreg_cp_tbl) { err = ENOMEM; goto error; @@ -1465,7 +1470,7 @@ error: if (priv) { if (priv->mreg_cp_tbl) -
[dpdk-dev] [PATCH v2 11/25] net/mlx5: create global drop action
This commit creates the global drop action for flows instead of maintain it in flow insertion time. The uniqueu global drop action makes it thread safe. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 5 + drivers/net/mlx5/mlx5.c| 2 ++ drivers/net/mlx5/mlx5_flow_dv.c| 38 +++ drivers/net/mlx5/mlx5_flow_verbs.c | 41 +++--- 4 files changed, 26 insertions(+), 60 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index ba31238..0900307 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1406,6 +1406,9 @@ } else { priv->obj_ops = ibv_obj_ops; } + priv->drop_queue.hrxq = mlx5_drop_action_create(eth_dev); + if (!priv->drop_queue.hrxq) + goto error; /* Supported Verbs flow priority number detection. */ err = mlx5_flow_discover_priorities(eth_dev); if (err < 0) { @@ -1471,6 +1474,8 @@ close(priv->nl_socket_rdma); if (priv->vmwa_context) mlx5_vlan_vmwa_exit(priv->vmwa_context); + if (eth_dev && priv->drop_queue.hrxq) + mlx5_drop_action_destroy(eth_dev); if (own_domain_id) claim_zero(rte_eth_switch_domain_free(priv->domain_id)); mlx5_free(priv); diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 327e023..5fbb342 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1301,6 +1301,8 @@ struct mlx5_dev_ctx_shared * priv->txqs = NULL; } mlx5_proc_priv_uninit(dev); + if (priv->drop_queue.hrxq) + mlx5_drop_action_destroy(dev); if (priv->mreg_cp_tbl) mlx5_hlist_destroy(priv->mreg_cp_tbl, NULL, NULL); mlx5_mprq_free_mp(dev); diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 6e17594..708ec65 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -9945,24 +9945,9 @@ struct field_modify_info modify_tcp[] = { if (dv->transfer) { dv->actions[n++] = priv->sh->esw_drop_action; } else { - struct mlx5_hrxq *drop_hrxq; - drop_hrxq = mlx5_drop_action_create(dev); - if (!drop_hrxq) { - rte_flow_error_set - (error, errno, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, -NULL, -"cannot get drop hash queue"); - goto error; - } - /* -* Drop queues will be released by the specify -* mlx5_drop_action_destroy() function. Assign -* the special index to hrxq to mark the queue -* has been allocated. -*/ - dh->rix_hrxq = UINT32_MAX; - dv->actions[n++] = drop_hrxq->action; + MLX5_ASSERT(priv->drop_queue.hrxq); + dv->actions[n++] = + priv->drop_queue.hrxq->action; } } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE && !dv_h->rix_sample && !dv_h->rix_dest_array) { @@ -10035,14 +10020,9 @@ struct field_modify_info modify_tcp[] = { SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, handle_idx, dh, next) { /* hrxq is union, don't clear it if the flag is not set. */ - if (dh->rix_hrxq) { - if (dh->fate_action == MLX5_FLOW_FATE_DROP) { - mlx5_drop_action_destroy(dev); - dh->rix_hrxq = 0; - } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE) { - mlx5_hrxq_release(dev, dh->rix_hrxq); - dh->rix_hrxq = 0; - } + if (dh->fate_action == MLX5_FLOW_FATE_QUEUE && dh->rix_hrxq) { + mlx5_hrxq_release(dev, dh->rix_hrxq); + dh->rix_hrxq = 0; } if (dh->vf_vlan.tag && dh->vf_vlan.created) mlx5_vlan_vmwa_release(dev, &dh->vf_vlan); @@ -10284,9 +10264,6 @@ struct field_modify_info modify_tcp[] = { if (!h
[dpdk-dev] [PATCH v2 13/25] net/mlx5: make flow table cache thread safe
From: Xueming Li To support multi-thread flow insertion/removal, this patch uses thread safe hash list API for flow table cache hash list. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.c | 102 - drivers/net/mlx5/mlx5.h | 2 +- drivers/net/mlx5/mlx5_flow.h| 17 + drivers/net/mlx5/mlx5_flow_dv.c | 164 4 files changed, 116 insertions(+), 169 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index da043e2..fa769cd 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1003,7 +1003,7 @@ struct mlx5_dev_ctx_shared * } /** - * Destroy table hash list and all the root entries per domain. + * Destroy table hash list. * * @param[in] priv * Pointer to the private device data structure. @@ -1012,46 +1012,9 @@ struct mlx5_dev_ctx_shared * mlx5_free_table_hash_list(struct mlx5_priv *priv) { struct mlx5_dev_ctx_shared *sh = priv->sh; - struct mlx5_flow_tbl_data_entry *tbl_data; - union mlx5_flow_tbl_key table_key = { - { - .table_id = 0, - .reserved = 0, - .domain = 0, - .direction = 0, - } - }; - struct mlx5_hlist_entry *pos; if (!sh->flow_tbls) return; - pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL); - if (pos) { - tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, - entry); - MLX5_ASSERT(tbl_data); - mlx5_hlist_remove(sh->flow_tbls, pos); - mlx5_free(tbl_data); - } - table_key.direction = 1; - pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL); - if (pos) { - tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, - entry); - MLX5_ASSERT(tbl_data); - mlx5_hlist_remove(sh->flow_tbls, pos); - mlx5_free(tbl_data); - } - table_key.direction = 0; - table_key.domain = 1; - pos = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL); - if (pos) { - tbl_data = container_of(pos, struct mlx5_flow_tbl_data_entry, - entry); - MLX5_ASSERT(tbl_data); - mlx5_hlist_remove(sh->flow_tbls, pos); - mlx5_free(tbl_data); - } mlx5_hlist_destroy(sh->flow_tbls); } @@ -1066,80 +1029,45 @@ struct mlx5_dev_ctx_shared * * Zero on success, positive error code otherwise. */ int -mlx5_alloc_table_hash_list(struct mlx5_priv *priv) +mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused) { + int err = 0; + /* Tables are only used in DV and DR modes. */ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT struct mlx5_dev_ctx_shared *sh = priv->sh; char s[MLX5_HLIST_NAMESIZE]; - int err = 0; MLX5_ASSERT(sh); snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name); sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE, - 0, 0, NULL, NULL, NULL); + 0, 0, flow_dv_tbl_create_cb, NULL, + flow_dv_tbl_remove_cb); if (!sh->flow_tbls) { DRV_LOG(ERR, "flow tables with hash creation failed."); err = ENOMEM; return err; } + sh->flow_tbls->ctx = sh; #ifndef HAVE_MLX5DV_DR + struct rte_flow_error error; + struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id]; + /* * In case we have not DR support, the zero tables should be created * because DV expect to see them even if they cannot be created by * RDMA-CORE. */ - union mlx5_flow_tbl_key table_key = { - { - .table_id = 0, - .reserved = 0, - .domain = 0, - .direction = 0, - } - }; - struct mlx5_flow_tbl_data_entry *tbl_data = mlx5_malloc(MLX5_MEM_ZERO, - sizeof(*tbl_data), 0, - SOCKET_ID_ANY); - - if (!tbl_data) { - err = ENOMEM; - goto error; - } - tbl_data->entry.key = table_key.v64; - err = mlx5_hlist_insert(sh->flow_tbls, &tbl_data->entry); - if (err) - goto error; - rte_atomic32_init(&tbl_data->tbl.refcnt); - rte_atomic32_inc(&tbl_data->tbl.refcnt); - table_key.direction = 1; - tbl_data = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*tbl_data), 0, - SOCKET_ID_ANY); - if (!t
[dpdk-dev] [PATCH v2 16/25] net/mlx5: make flow modify action list thread safe
From: Xueming Li To support multi-thread flow insertion, this patch updates flow modify action list to use thread safe hash list with write-most mode. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 +- drivers/net/mlx5/mlx5_flow.h | 14 ++- drivers/net/mlx5/mlx5_flow_dv.c | 194 +-- 3 files changed, 102 insertions(+), 113 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 21a9db6..822edd3 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -250,12 +250,17 @@ sh->tag_table->ctx = sh; snprintf(s, sizeof(s), "%s_hdr_modify", sh->ibdev_name); sh->modify_cmds = mlx5_hlist_create(s, MLX5_FLOW_HDR_MODIFY_HTABLE_SZ, - 0, 0, NULL, NULL, NULL); + 0, MLX5_HLIST_WRITE_MOST | + MLX5_HLIST_DIRECT_KEY, + flow_dv_modify_create_cb, + flow_dv_modify_match_cb, + flow_dv_modify_remove_cb); if (!sh->modify_cmds) { DRV_LOG(ERR, "hdr modify hash creation failed"); err = ENOMEM; goto error; } + sh->modify_cmds->ctx = sh; snprintf(s, sizeof(s), "%s_encaps_decaps", sh->ibdev_name); sh->encaps_decaps = mlx5_hlist_create(s, MLX5_FLOW_ENCAP_DECAP_HTABLE_SZ, diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 87d7fe7..45b9b05 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -464,10 +464,8 @@ struct mlx5_flow_dv_tag_resource { /* Modify resource structure */ struct mlx5_flow_dv_modify_hdr_resource { struct mlx5_hlist_entry entry; - /* Pointer to next element. */ - rte_atomic32_t refcnt; /**< Reference counter. */ - void *action; - /**< Modify header action object. */ + void *action; /**< Modify header action object. */ + /* Key area for hash list matching: */ uint8_t ft_type; /**< Flow table type, Rx or Tx. */ uint32_t actions_num; /**< Number of modification actions. */ uint64_t flags; /**< Flags for RDMA API. */ @@ -1171,4 +1169,12 @@ struct mlx5_hlist_entry *flow_dv_tag_create_cb(struct mlx5_hlist *list, void flow_dv_tag_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry); +int flow_dv_modify_match_cb(struct mlx5_hlist *list, + struct mlx5_hlist_entry *entry, + uint64_t key, void *cb_ctx); +struct mlx5_hlist_entry *flow_dv_modify_create_cb(struct mlx5_hlist *list, + uint64_t key, void *ctx); +void flow_dv_modify_remove_cb(struct mlx5_hlist *list, + struct mlx5_hlist_entry *entry); + #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 2d4c8dd..7e9f9f9 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4204,35 +4204,75 @@ struct field_modify_info modify_tcp[] = { /** * Match modify-header resource. * + * @param list + * Pointer to the hash list. * @param entry * Pointer to exist resource entry object. + * @param key + * Key of the new entry. * @param ctx * Pointer to new modify-header resource. * * @return - * 0 on matching, -1 otherwise. + * 0 on matching, non-zero otherwise. */ -static int -flow_dv_modify_hdr_resource_match(struct mlx5_hlist_entry *entry, void *ctx) +int +flow_dv_modify_match_cb(struct mlx5_hlist *list __rte_unused, + struct mlx5_hlist_entry *entry, + uint64_t key __rte_unused, void *cb_ctx) { - struct mlx5_flow_dv_modify_hdr_resource *resource; - struct mlx5_flow_dv_modify_hdr_resource *cache_resource; - uint32_t actions_len; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_modify_hdr_resource *ref = ctx->data; + struct mlx5_flow_dv_modify_hdr_resource *resource = + container_of(entry, typeof(*resource), entry); + uint32_t key_len = sizeof(*ref) - offsetof(typeof(*ref), ft_type); - resource = (struct mlx5_flow_dv_modify_hdr_resource *)ctx; - cache_resource = container_of(entry, - struct mlx5_flow_dv_modify_hdr_resource, - entry); - actions_len = resource->actions_num * sizeof(resource->actions[0]); - if (resource->entry.key == cache_resource->entry.key && - resource->ft_type == cache_resource->ft_type && - resource->actions_num == cache_resource->actions_num && - res
[dpdk-dev] [PATCH v2 14/25] net/mlx5: fix redundant Direct Verbs resources allocate
All table, tag, header modify, header reformat are supported only on DV mode. For the OFED version doesn't support these, create the related redundant DV resources waste the memory. Add the code section in the HAVE_IBV_FLOW_DV_SUPPORT macro to avoid the redundant resources allocation. Fixes: 2eb4d0107acc ("net/mlx5: refactor PCI probing on Linux") Cc: sta...@dpdk.org Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 929fed2..6b60d56 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -225,7 +225,7 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) { struct mlx5_dev_ctx_shared *sh = priv->sh; - char s[MLX5_HLIST_NAMESIZE]; + char s[MLX5_HLIST_NAMESIZE] __rte_unused; int err; MLX5_ASSERT(sh && sh->refcnt); @@ -233,7 +233,9 @@ return 0; err = mlx5_alloc_table_hash_list(priv); if (err) - return err; + goto error; + /* The resources below are only valid with DV support. */ +#ifdef HAVE_IBV_FLOW_DV_SUPPORT /* Create tags hash list table. */ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0, @@ -260,6 +262,7 @@ err = ENOMEM; goto error; } +#endif #ifdef HAVE_MLX5DV_DR void *domain; -- 1.8.3.1
[dpdk-dev] [PATCH v2 15/25] net/mlx5: make flow tag list thread safe
From: Xueming Li To support multi-thread flow insertion, this patch updates flow tag list to use thread safe hash list with write-most mode. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 5 ++- drivers/net/mlx5/mlx5_flow.h | 5 +++ drivers/net/mlx5/mlx5_flow_dv.c | 97 +++- 3 files changed, 56 insertions(+), 51 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 6b60d56..21a9db6 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -239,12 +239,15 @@ /* Create tags hash list table. */ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0, - 0, NULL, NULL, NULL); + MLX5_HLIST_WRITE_MOST, + flow_dv_tag_create_cb, NULL, + flow_dv_tag_remove_cb); if (!sh->tag_table) { DRV_LOG(ERR, "tags with hash creation failed."); err = ENOMEM; goto error; } + sh->tag_table->ctx = sh; snprintf(s, sizeof(s), "%s_hdr_modify", sh->ibdev_name); sh->modify_cmds = mlx5_hlist_create(s, MLX5_FLOW_HDR_MODIFY_HTABLE_SZ, 0, 0, NULL, NULL, NULL); diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 8d12b5d..87d7fe7 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1166,4 +1166,9 @@ struct mlx5_flow_tbl_resource *flow_dv_tbl_resource_get(struct rte_eth_dev *dev, uint32_t table_id, uint8_t egress, uint8_t transfer, uint8_t dummy, struct rte_flow_error *error); +struct mlx5_hlist_entry *flow_dv_tag_create_cb(struct mlx5_hlist *list, + uint64_t key, void *cb_ctx); +void flow_dv_tag_remove_cb(struct mlx5_hlist *list, + struct mlx5_hlist_entry *entry); + #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 18a52b0..2d4c8dd 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -8044,6 +8044,35 @@ struct mlx5_flow_tbl_resource * return 0; } +struct mlx5_hlist_entry * +flow_dv_tag_create_cb(struct mlx5_hlist *list, uint64_t key, void *ctx) +{ + struct mlx5_dev_ctx_shared *sh = list->ctx; + struct rte_flow_error *error = ctx; + struct mlx5_flow_dv_tag_resource *entry; + uint32_t idx = 0; + int ret; + + entry = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_TAG], &idx); + if (!entry) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "cannot allocate resource memory"); + return NULL; + } + entry->idx = idx; + ret = mlx5_flow_os_create_flow_action_tag(key, + &entry->action); + if (ret) { + mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TAG], idx); + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "cannot create action"); + return NULL; + } + return &entry->entry; +} + /** * Find existing tag resource or create and register a new one. * @@ -8067,54 +8096,32 @@ struct mlx5_flow_tbl_resource * struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_tag_resource *cache_resource; struct mlx5_hlist_entry *entry; - int ret; - /* Lookup a matching resource from cache. */ - entry = mlx5_hlist_lookup(sh->tag_table, (uint64_t)tag_be24, NULL); + entry = mlx5_hlist_register(priv->sh->tag_table, tag_be24, error); if (entry) { cache_resource = container_of (entry, struct mlx5_flow_dv_tag_resource, entry); - rte_atomic32_inc(&cache_resource->refcnt); dev_flow->handle->dvh.rix_tag = cache_resource->idx; dev_flow->dv.tag_resource = cache_resource; - DRV_LOG(DEBUG, "cached tag resource %p: refcnt now %d++", - (void *)cache_resource, - rte_atomic32_read(&cache_resource->refcnt)); return 0; } - /* Register new resource. */ - cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_TAG], - &dev_flow->handle->dvh.rix_tag); - if (!cache_resource) - return rte_flow_error_set(error, ENOMEM,
[dpdk-dev] [PATCH v2 18/25] net/mlx5: make metadata copy flow list thread safe
From: Xueming Li To support multi-thread flow insertion, this patch updates metadata copy flow list to use thread safe hash list. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 5 +- drivers/net/mlx5/mlx5_flow.c | 162 ++- drivers/net/mlx5/mlx5_flow.h | 6 +- 3 files changed, 99 insertions(+), 74 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 822edd3..6ed4abc 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1470,11 +1470,14 @@ priv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME, MLX5_FLOW_MREG_HTABLE_SZ, 0, 0, - NULL, NULL, NULL); + flow_dv_mreg_create_cb, + NULL, + flow_dv_mreg_remove_cb); if (!priv->mreg_cp_tbl) { err = ENOMEM; goto error; } + priv->mreg_cp_tbl->ctx = eth_dev; } mlx5_flow_counter_mode_config(eth_dev); return eth_dev; diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index b2e694d..ba2e200 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -3287,36 +3287,18 @@ struct mlx5_flow_tunnel_info { flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, uint32_t flow_idx); -/** - * Add a flow of copying flow metadata registers in RX_CP_TBL. - * - * As mark_id is unique, if there's already a registered flow for the mark_id, - * return by increasing the reference counter of the resource. Otherwise, create - * the resource (mcp_res) and flow. - * - * Flow looks like, - * - If ingress port is ANY and reg_c[1] is mark_id, - * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL. - * - * For default flow (zero mark_id), flow is like, - * - If ingress port is ANY, - * reg_b := reg_c[0] and jump to RX_ACT_TBL. - * - * @param dev - * Pointer to Ethernet device. - * @param mark_id - * ID of MARK action, zero means default flow for META. - * @param[out] error - * Perform verbose error reporting if not NULL. - * - * @return - * Associated resource on success, NULL otherwise and rte_errno is set. - */ -static struct mlx5_flow_mreg_copy_resource * -flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, - struct rte_flow_error *error) +struct mlx5_hlist_entry * +flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key, + void *cb_ctx) { + struct rte_eth_dev *dev = list->ctx; struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_mreg_copy_resource *mcp_res; + struct rte_flow_error *error = ctx->error; + uint32_t idx = 0; + int ret; + uint32_t mark_id = key; struct rte_flow_attr attr = { .group = MLX5_FLOW_MREG_CP_TABLE_GROUP, .ingress = 1, @@ -3340,9 +3322,6 @@ struct mlx5_flow_tunnel_info { struct rte_flow_action actions[] = { [3] = { .type = RTE_FLOW_ACTION_TYPE_END, }, }; - struct mlx5_flow_mreg_copy_resource *mcp_res; - uint32_t idx = 0; - int ret; /* Fill the register fileds in the flow. */ ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error); @@ -3353,17 +3332,6 @@ struct mlx5_flow_tunnel_info { if (ret < 0) return NULL; cp_mreg.src = ret; - /* Check if already registered. */ - MLX5_ASSERT(priv->mreg_cp_tbl); - mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, NULL); - if (mcp_res) { - /* For non-default rule. */ - if (mark_id != MLX5_DEFAULT_COPY_ID) - mcp_res->refcnt++; - MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID || - mcp_res->refcnt == 1); - return mcp_res; - } /* Provide the full width of FLAG specific value. */ if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT)) tag_spec.data = MLX5_FLOW_MARK_DEFAULT; @@ -3428,20 +3396,69 @@ struct mlx5_flow_tunnel_info { */ mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items, actions, false, error); - if (!mcp_res->rix_flow) - goto error; - mcp_res->refcnt++; - mcp_res->hlist_ent.key = mark_id; - ret = !mlx5_hlist_insert(priv->mreg_cp_tbl, &mcp_res->hlist_ent); - MLX5_ASSERT(!ret); - if (ret) - goto e
[dpdk-dev] [PATCH v2 17/25] net/mlx5: remove unused mreg copy code
After non-cache mode feature was implemented, the flows can only be created when port started. No need to check if the mreg flows are created in port stopped status, and apply the mreg flows after port start will also never happen. This commit removed the relevant not used mreg copy code. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.h | 2 - drivers/net/mlx5/mlx5_flow.c | 185 --- drivers/net/mlx5/mlx5_flow.h | 2 - 3 files changed, 189 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index ae6d37d..0e4917a 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1050,8 +1050,6 @@ int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_type filter_type, enum rte_filter_op filter_op, void *arg); -int mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list); -void mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list); int mlx5_flow_start_default(struct rte_eth_dev *dev); void mlx5_flow_stop_default(struct rte_eth_dev *dev); int mlx5_flow_verify(struct rte_eth_dev *dev); diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 80b4980..b2e694d 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -3002,28 +3002,6 @@ struct mlx5_flow_tunnel_info { } /** - * Flow driver remove API. This abstracts calling driver specific functions. - * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow - * on device. All the resources of the flow should be freed by calling - * flow_drv_destroy(). - * - * @param[in] dev - * Pointer to Ethernet device. - * @param[in, out] flow - * Pointer to flow structure. - */ -static inline void -flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) -{ - const struct mlx5_flow_driver_ops *fops; - enum mlx5_flow_drv_type type = flow->drv_type; - - MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX); - fops = flow_get_drv_ops(type); - fops->remove(dev, flow); -} - -/** * Flow driver destroy API. This abstracts calling driver specific functions. * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow * on device and releases resources of the flow. @@ -3487,19 +3465,6 @@ struct mlx5_flow_tunnel_info { flow->rix_mreg_copy); if (!mcp_res || !priv->mreg_cp_tbl) return; - if (flow->copy_applied) { - MLX5_ASSERT(mcp_res->appcnt); - flow->copy_applied = 0; - --mcp_res->appcnt; - if (!mcp_res->appcnt) { - struct rte_flow *mcp_flow = mlx5_ipool_get - (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], - mcp_res->rix_flow); - - if (mcp_flow) - flow_drv_remove(dev, mcp_flow); - } - } /* * We do not check availability of metadata registers here, * because copy resources are not allocated in this case. @@ -3514,81 +3479,6 @@ struct mlx5_flow_tunnel_info { } /** - * Start flow in RX_CP_TBL. - * - * @param dev - * Pointer to Ethernet device. - * @flow - * Parent flow for wich copying is provided. - * - * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. - */ -static int -flow_mreg_start_copy_action(struct rte_eth_dev *dev, - struct rte_flow *flow) -{ - struct mlx5_flow_mreg_copy_resource *mcp_res; - struct mlx5_priv *priv = dev->data->dev_private; - int ret; - - if (!flow->rix_mreg_copy || flow->copy_applied) - return 0; - mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP], -flow->rix_mreg_copy); - if (!mcp_res) - return 0; - if (!mcp_res->appcnt) { - struct rte_flow *mcp_flow = mlx5_ipool_get - (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], - mcp_res->rix_flow); - - if (mcp_flow) { - ret = flow_drv_apply(dev, mcp_flow, NULL); - if (ret) - return ret; - } - } - ++mcp_res->appcnt; - flow->copy_applied = 1; - return 0; -} - -/** - * Stop flow in RX_CP_TBL. - * - * @param dev - * Pointer to Ethernet device. - * @flow - * Parent flow for wich copying is provided. - */ -static void -flow_mreg_stop_copy_action(struct rte_eth_dev *dev, - struct rte_flow *flow) -{ - struct mlx5_flow_mreg_copy_resource *mcp_res; - struct mlx5_priv *priv = dev->data->dev_private; - - if (!flow->rix_mreg_copy || !flow->copy_applied) - return; -
[dpdk-dev] [PATCH v2 20/25] net/mlx5: remove unused hash list operations
In previous commits the hash list objects have been converted to new thread safe hash list. The legacy hash list code can be removed now. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_utils.c | 38 - drivers/net/mlx5/mlx5_utils.h | 66 --- 2 files changed, 104 deletions(-) diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index d041b07..1867fde 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -170,44 +170,6 @@ struct mlx5_hlist_entry* return entry; } -struct mlx5_hlist_entry * -mlx5_hlist_lookup_ex(struct mlx5_hlist *h, uint64_t key, -mlx5_hlist_match_callback_fn cb, void *ctx) -{ - uint32_t idx; - struct mlx5_hlist_head *first; - struct mlx5_hlist_entry *node; - - MLX5_ASSERT(h && cb && ctx); - idx = rte_hash_crc_8byte(key, 0) & h->mask; - first = &h->heads[idx]; - LIST_FOREACH(node, first, next) { - if (!cb(node, ctx)) - return node; - } - return NULL; -} - -int -mlx5_hlist_insert_ex(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry, -mlx5_hlist_match_callback_fn cb, void *ctx) -{ - uint32_t idx; - struct mlx5_hlist_head *first; - struct mlx5_hlist_entry *node; - - MLX5_ASSERT(h && entry && cb && ctx); - idx = rte_hash_crc_8byte(entry->key, 0) & h->mask; - first = &h->heads[idx]; - /* No need to reuse the lookup function. */ - LIST_FOREACH(node, first, next) { - if (!cb(node, ctx)) - return -EEXIST; - } - LIST_INSERT_HEAD(first, entry, next); - return 0; -} - int mlx5_hlist_unregister(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry) { diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index c665558..6968d94 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -21,12 +21,6 @@ #include "mlx5_defs.h" -#define mlx5_hlist_remove(h, e) \ - mlx5_hlist_unregister(h, e) - -#define mlx5_hlist_insert(h, e) \ - mlx5_hlist_register(h, 0, e) - /* Convert a bit number to the corresponding 64-bit mask */ #define MLX5_BITSHIFT(v) (UINT64_C(1) << (v)) @@ -287,23 +281,6 @@ struct mlx5_hlist_entry { /** Structure for hash head. */ LIST_HEAD(mlx5_hlist_head, mlx5_hlist_entry); -/** Type of function that is used to handle the data before freeing. */ -typedef void (*mlx5_hlist_destroy_callback_fn)(void *p, void *ctx); - -/** - * Type of function for user defined matching. - * - * @param entry - * The entry in the list. - * @param ctx - * The pointer to new entry context. - * - * @return - * 0 if matching, -1 otherwise. - */ -typedef int (*mlx5_hlist_match_callback_fn)(struct mlx5_hlist_entry *entry, -void *ctx); - /** * Type of callback function for entry removal. * @@ -429,49 +406,6 @@ struct mlx5_hlist_entry *mlx5_hlist_lookup(struct mlx5_hlist *h, uint64_t key, void *ctx); /** - * Extended routine to search an entry matching the context with - * user defined match function. - * - * @param h - * Pointer to the hast list table. - * @param key - * Key for the searching entry. - * @param cb - * Callback function to match the node with context. - * @param ctx - * Common context parameter used by callback function. - * - * @return - * Pointer of the hlist entry if found, NULL otherwise. - */ -struct mlx5_hlist_entry *mlx5_hlist_lookup_ex(struct mlx5_hlist *h, - uint64_t key, - mlx5_hlist_match_callback_fn cb, - void *ctx); - -/** - * Extended routine to insert an entry to the list with key collisions. - * - * For the list have key collision, the extra user defined match function - * allows node with same key will be inserted. - * - * @param h - * Pointer to the hast list table. - * @param entry - * Entry to be inserted into the hash list table. - * @param cb - * Callback function to match the node with context. - * @param ctx - * Common context parameter used by callback function. - * - * @return - * - zero for success. - * - -EEXIST if the entry is already inserted. - */ -int mlx5_hlist_insert_ex(struct mlx5_hlist *h, struct mlx5_hlist_entry *entry, -mlx5_hlist_match_callback_fn cb, void *ctx); - -/** * Insert an entry to the hash list table, the entry is only part of whole data * element and a 64B key is used for matching. User should construct the key or * give a calculated hash signature and guarantee there is no collision. -- 1.8.3.1
[dpdk-dev] [PATCH v2 21/25] net/mlx5: introduce thread safe linked list cache
From: Xueming Li New API of linked list for cache: - Optimized for small amount cache list. - Optimized for read-most list. - Thread safe. - Since number of entries are limited, entries allocated by API. - For dynamic entry size, pass 0 as entry size, then the creation callback allocate the entry. - Since number of entries are limited, no need to use indexed pool to allocate memory. API will remove entry and free with mlx5_free. - Search API is not supposed to be used in multi-thread. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_utils.c | 160 drivers/net/mlx5/mlx5_utils.h | 183 ++ 2 files changed, 343 insertions(+) diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c index 1867fde..13590dd 100644 --- a/drivers/net/mlx5/mlx5_utils.c +++ b/drivers/net/mlx5/mlx5_utils.c @@ -217,6 +217,166 @@ struct mlx5_hlist_entry* mlx5_free(h); } +/* Cache list / + +static struct mlx5_cache_entry * +mlx5_clist_default_create_cb(struct mlx5_cache_list *list, +struct mlx5_cache_entry *entry __rte_unused, +void *ctx __rte_unused) +{ + return mlx5_malloc(MLX5_MEM_ZERO, list->entry_sz, 0, SOCKET_ID_ANY); +} + +static void +mlx5_clist_default_remove_cb(struct mlx5_cache_list *list __rte_unused, +struct mlx5_cache_entry *entry) +{ + mlx5_free(entry); +} + +int +mlx5_cache_list_init(struct mlx5_cache_list *list, const char *name, +uint32_t entry_size, void *ctx, +mlx5_cache_create_cb cb_create, +mlx5_cache_match_cb cb_match, +mlx5_cache_remove_cb cb_remove) +{ + MLX5_ASSERT(list); + if (!cb_match || (!cb_create ^ !cb_remove)) + return -1; + if (name) + snprintf(list->name, sizeof(list->name), "%s", name); + list->entry_sz = entry_size; + list->ctx = ctx; + list->cb_create = cb_create ? cb_create : mlx5_clist_default_create_cb; + list->cb_match = cb_match; + list->cb_remove = cb_remove ? cb_remove : mlx5_clist_default_remove_cb; + rte_rwlock_init(&list->lock); + DRV_LOG(DEBUG, "Cache list %s initialized.", list->name); + LIST_INIT(&list->head); + return 0; +} + +static struct mlx5_cache_entry * +__cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse) +{ + struct mlx5_cache_entry *entry; + + LIST_FOREACH(entry, &list->head, next) { + if (list->cb_match(list, entry, ctx)) + continue; + if (reuse) { + __atomic_add_fetch(&entry->ref_cnt, 1, + __ATOMIC_RELAXED); + DRV_LOG(DEBUG, "Cache list %s entry %p ref++: %u.", + list->name, (void *)entry, entry->ref_cnt); + } + break; + } + return entry; +} + +static struct mlx5_cache_entry * +cache_lookup(struct mlx5_cache_list *list, void *ctx, bool reuse) +{ + struct mlx5_cache_entry *entry; + + rte_rwlock_read_lock(&list->lock); + entry = __cache_lookup(list, ctx, reuse); + rte_rwlock_read_unlock(&list->lock); + return entry; +} + +struct mlx5_cache_entry * +mlx5_cache_lookup(struct mlx5_cache_list *list, void *ctx) +{ + return cache_lookup(list, ctx, false); +} + +struct mlx5_cache_entry * +mlx5_cache_register(struct mlx5_cache_list *list, void *ctx) +{ + struct mlx5_cache_entry *entry; + uint32_t prev_gen_cnt = 0; + + MLX5_ASSERT(list); + prev_gen_cnt = __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE); + /* Lookup with read lock, reuse if found. */ + entry = cache_lookup(list, ctx, true); + if (entry) + return entry; + /* Not found, append with write lock - block read from other threads. */ + rte_rwlock_write_lock(&list->lock); + /* If list changed by other threads before lock, search again. */ + if (prev_gen_cnt != __atomic_load_n(&list->gen_cnt, __ATOMIC_ACQUIRE)) { + /* Lookup and reuse w/o read lock. */ + entry = __cache_lookup(list, ctx, true); + if (entry) + goto done; + } + entry = list->cb_create(list, entry, ctx); + if (!entry) { + DRV_LOG(ERR, "Failed to init cache list %s entry %p.", + list->name, (void *)entry); + goto done; + } + entry->ref_cnt = 1; + LIST_INSERT_HEAD(&list->head, entry, next); + __atomic_add_fetch(&list->gen_cnt, 1, __ATOMIC_RELEASE); + __atomic_add_fetch(&list->count, 1, __ATOMIC_ACQUIRE); + DRV_LOG(DEBUG, "Cache list %s entry %p new: %u.", + list->name, (void *)entry, entry->ref_cnt);
[dpdk-dev] [PATCH v2 19/25] net/mlx5: make header reformat action thread safe
To support multi-thread flow insertion, this patch updates flow header reformat action list to use thread safe hash list with write-most mode. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 +- drivers/net/mlx5/mlx5_flow.h | 7 ++ drivers/net/mlx5/mlx5_flow_dv.c | 184 ++- 3 files changed, 116 insertions(+), 82 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 6ed4abc..9d1a5d7 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -264,12 +264,17 @@ snprintf(s, sizeof(s), "%s_encaps_decaps", sh->ibdev_name); sh->encaps_decaps = mlx5_hlist_create(s, MLX5_FLOW_ENCAP_DECAP_HTABLE_SZ, - 0, 0, NULL, NULL, NULL); + 0, MLX5_HLIST_DIRECT_KEY | + MLX5_HLIST_WRITE_MOST, + flow_dv_encap_decap_create_cb, + flow_dv_encap_decap_match_cb, + flow_dv_encap_decap_remove_cb); if (!sh->encaps_decaps) { DRV_LOG(ERR, "encap decap hash creation failed"); err = ENOMEM; goto error; } + sh->encaps_decaps->ctx = sh; #endif #ifdef HAVE_MLX5DV_DR void *domain; diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index f35a7ce..fd53c4d 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1179,4 +1179,11 @@ struct mlx5_hlist_entry *flow_dv_mreg_create_cb(struct mlx5_hlist *list, void flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry); +int flow_dv_encap_decap_match_cb(struct mlx5_hlist *list, +struct mlx5_hlist_entry *entry, +uint64_t key, void *cb_ctx); +struct mlx5_hlist_entry *flow_dv_encap_decap_create_cb(struct mlx5_hlist *list, + uint64_t key, void *cb_ctx); +void flow_dv_encap_decap_remove_cb(struct mlx5_hlist *list, + struct mlx5_hlist_entry *entry); #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 7e9f9f9..92eb91f 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -2782,21 +2782,27 @@ struct field_modify_info modify_tcp[] = { /** * Match encap_decap resource. * + * @param list + * Pointer to the hash list. * @param entry * Pointer to exist resource entry object. - * @param ctx + * @param key + * Key of the new entry. + * @param ctx_cb * Pointer to new encap_decap resource. * * @return - * 0 on matching, -1 otherwise. + * 0 on matching, none-zero otherwise. */ -static int -flow_dv_encap_decap_resource_match(struct mlx5_hlist_entry *entry, void *ctx) +int +flow_dv_encap_decap_match_cb(struct mlx5_hlist *list __rte_unused, +struct mlx5_hlist_entry *entry, +uint64_t key __rte_unused, void *cb_ctx) { - struct mlx5_flow_dv_encap_decap_resource *resource; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_encap_decap_resource *resource = ctx->data; struct mlx5_flow_dv_encap_decap_resource *cache_resource; - resource = (struct mlx5_flow_dv_encap_decap_resource *)ctx; cache_resource = container_of(entry, struct mlx5_flow_dv_encap_decap_resource, entry); @@ -2813,6 +2819,63 @@ struct field_modify_info modify_tcp[] = { } /** + * Allocate encap_decap resource. + * + * @param list + * Pointer to the hash list. + * @param entry + * Pointer to exist resource entry object. + * @param ctx_cb + * Pointer to new encap_decap resource. + * + * @return + * 0 on matching, none-zero otherwise. + */ +struct mlx5_hlist_entry * +flow_dv_encap_decap_create_cb(struct mlx5_hlist *list, + uint64_t key __rte_unused, + void *cb_ctx) +{ + struct mlx5_dev_ctx_shared *sh = list->ctx; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5dv_dr_domain *domain; + struct mlx5_flow_dv_encap_decap_resource *resource = ctx->data; + struct mlx5_flow_dv_encap_decap_resource *cache_resource; + uint32_t idx; + int ret; + + if (resource->ft_type == MLX5DV_FLOW_TABLE_TYPE_FDB) + domain = sh->fdb_domain; + else if (resource->ft_type == MLX5DV_FLOW_TABLE_TYPE_NIC_RX) + domain = sh->rx_domain; + else + domain = sh->tx_domain; + /* Register new encap/decap resource. */ + cache_resou
[dpdk-dev] [PATCH v2 23/25] net/mlx5: make matcher list thread safe
From: Xueming Li To support multi-thread flow insertion, this path converts matcher list to use thread safe cache list API. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5.h | 3 + drivers/net/mlx5/mlx5_flow.h| 15 ++- drivers/net/mlx5/mlx5_flow_dv.c | 214 +--- 3 files changed, 129 insertions(+), 103 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 7157dbf..30ab09b 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -32,6 +32,9 @@ #include "mlx5_os.h" #include "mlx5_autoconf.h" + +#define MLX5_SH(dev) (((struct mlx5_priv *)(dev)->data->dev_private)->sh) + enum mlx5_ipool_index { #ifdef HAVE_IBV_FLOW_DV_SUPPORT MLX5_IPOOL_DECAP_ENCAP = 0, /* Pool for encap/decap resource. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index c332308..eac8916 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -391,11 +391,9 @@ struct mlx5_flow_dv_match_params { /* Matcher structure. */ struct mlx5_flow_dv_matcher { - LIST_ENTRY(mlx5_flow_dv_matcher) next; - /**< Pointer to the next element. */ + struct mlx5_cache_entry entry; /**< Pointer to the next element. */ struct mlx5_flow_tbl_resource *tbl; /**< Pointer to the table(group) the matcher associated with. */ - rte_atomic32_t refcnt; /**< Reference counter. */ void *matcher_object; /**< Pointer to DV matcher */ uint16_t crc; /**< CRC of key. */ uint16_t priority; /**< Priority of matcher. */ @@ -522,11 +520,12 @@ struct mlx5_flow_tbl_data_entry { /**< hash list entry, 64-bits key inside. */ struct mlx5_flow_tbl_resource tbl; /**< flow table resource. */ - LIST_HEAD(matchers, mlx5_flow_dv_matcher) matchers; + struct mlx5_cache_list matchers; /**< matchers' header associated with the flow table. */ struct mlx5_flow_dv_jump_tbl_resource jump; /**< jump resource, at most one for each table created. */ uint32_t idx; /**< index for the indexed mempool. */ + bool is_egress; /**< Egress table. */ }; /* Sub rdma-core actions list. */ @@ -1170,4 +1169,12 @@ struct mlx5_hlist_entry *flow_dv_encap_decap_create_cb(struct mlx5_hlist *list, uint64_t key, void *cb_ctx); void flow_dv_encap_decap_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry); + +int flow_dv_matcher_match_cb(struct mlx5_cache_list *list, +struct mlx5_cache_entry *entry, void *ctx); +struct mlx5_cache_entry *flow_dv_matcher_create_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry, void *ctx); +void flow_dv_matcher_remove_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry); + #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 2ddaf75..2d0ef3a 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -70,7 +70,7 @@ }; static int -flow_dv_tbl_resource_release(struct rte_eth_dev *dev, +flow_dv_tbl_resource_release(struct mlx5_dev_ctx_shared *sh, struct mlx5_flow_tbl_resource *tbl); static int @@ -7817,6 +7817,7 @@ struct mlx5_hlist_entry * return NULL; } tbl_data->idx = idx; + tbl_data->is_egress = !!key.direction; tbl = &tbl_data->tbl; if (key.dummy) return &tbl_data->entry; @@ -7847,6 +7848,13 @@ struct mlx5_hlist_entry * return NULL; } } + MKSTR(matcher_name, "%s_%s_%u_matcher_cache", + key.domain ? "FDB" : "NIC", key.direction ? "egress" : "ingress", + key.table_id); + mlx5_cache_list_init(&tbl_data->matchers, matcher_name, 0, sh, +flow_dv_matcher_create_cb, +flow_dv_matcher_match_cb, +flow_dv_matcher_remove_cb); return &tbl_data->entry; } @@ -7909,14 +7917,15 @@ struct mlx5_flow_tbl_resource * MLX5_ASSERT(entry && sh); if (tbl_data->tbl.obj) mlx5_flow_os_destroy_flow_tbl(tbl_data->tbl.obj); + mlx5_cache_list_destroy(&tbl_data->matchers); mlx5_ipool_free(sh->ipool[MLX5_IPOOL_JUMP], tbl_data->idx); } /** * Release a flow table. * - * @param[in] dev - * Pointer to rte_eth_dev structure. + * @param[in] sh + * Pointer to device shared structure. * @param[in] tbl * Table resource to be released. * @@ -7924,11 +7933,9 @@ struct mlx5_flow_tbl_resource * * Returns 0 if table was released, else return 1; */ static int -flow_dv_tbl_resource_release(struct rte_eth_dev *dev, +flow_dv_tbl_resource_release(struct mlx5_dev_ctx_shared *sh,
[dpdk-dev] [PATCH v2 22/25] net/mlx5: make Rx queue thread safe
This commit applies the cache linked list to Rx queue to make it thread safe. Signed-off-by: Suanming Mou Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 5 + drivers/net/mlx5/mlx5.c| 1 + drivers/net/mlx5/mlx5.h| 24 +++- drivers/net/mlx5/mlx5_flow.h | 16 --- drivers/net/mlx5/mlx5_flow_dv.c| 61 -- drivers/net/mlx5/mlx5_flow_verbs.c | 19 +-- drivers/net/mlx5/mlx5_rxq.c| 235 +++-- drivers/net/mlx5/mlx5_rxtx.h | 20 ++-- 8 files changed, 182 insertions(+), 199 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 9d1a5d7..b0dcb40 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1450,6 +1450,10 @@ err = ENOTSUP; goto error; } + mlx5_cache_list_init(&priv->hrxqs, "hrxq", 0, eth_dev, +mlx5_hrxq_create_cb, +mlx5_hrxq_match_cb, +mlx5_hrxq_remove_cb); /* Query availability of metadata reg_c's. */ err = mlx5_flow_discover_mreg_c(eth_dev); if (err < 0) { @@ -1502,6 +1506,7 @@ mlx5_drop_action_destroy(eth_dev); if (own_domain_id) claim_zero(rte_eth_switch_domain_free(priv->domain_id)); + mlx5_cache_list_destroy(&priv->hrxqs); mlx5_free(priv); if (eth_dev != NULL) eth_dev->data->dev_private = NULL; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index fa769cd..cacc799 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1276,6 +1276,7 @@ struct mlx5_dev_ctx_shared * if (ret) DRV_LOG(WARNING, "port %u some flows still remain", dev->data->port_id); + mlx5_cache_list_destroy(&priv->hrxqs); /* * Free the shared context in last turn, because the cleanup * routines above may use some shared fields, like diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 0e4917a..7157dbf 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -63,6 +63,13 @@ enum mlx5_reclaim_mem_mode { MLX5_RCM_AGGR, /* Reclaim PMD and rdma-core level. */ }; +/* Hash and cache list callback context. */ +struct mlx5_flow_cb_ctx { + struct rte_eth_dev *dev; + struct rte_flow_error *error; + void *data; +}; + /* Device attributes used in mlx5 PMD */ struct mlx5_dev_attr { uint64_tdevice_cap_flags_ex; @@ -671,6 +678,18 @@ struct mlx5_proc_priv { /* MTR list. */ TAILQ_HEAD(mlx5_flow_meters, mlx5_flow_meter); +/* RSS description. */ +struct mlx5_flow_rss_desc { + uint32_t level; + uint32_t queue_num; /**< Number of entries in @p queue. */ + uint64_t types; /**< Specific RSS hash types (see ETH_RSS_*). */ + uint64_t hash_fields; /* Verbs Hash fields. */ + uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */ + uint32_t key_len; /**< RSS hash key len. */ + uint32_t tunnel; /**< Queue in tunnel. */ + uint16_t *queue; /**< Destination queues. */ +}; + #define MLX5_PROC_PRIV(port_id) \ ((struct mlx5_proc_priv *)rte_eth_devices[port_id].process_private) @@ -709,7 +728,7 @@ struct mlx5_ind_table_obj { /* Hash Rx queue. */ struct mlx5_hrxq { - ILIST_ENTRY(uint32_t)next; /* Index to the next element. */ + struct mlx5_cache_entry entry; /* Cache entry. */ rte_atomic32_t refcnt; /* Reference counter. */ struct mlx5_ind_table_obj *ind_table; /* Indirection table. */ RTE_STD_C11 @@ -722,6 +741,7 @@ struct mlx5_hrxq { #endif uint64_t hash_fields; /* Verbs Hash fields. */ uint32_t rss_key_len; /* Hash key length in bytes. */ + uint32_t idx; /* Hash Rx queue index. */ uint8_t rss_key[]; /* Hash key. */ }; @@ -835,7 +855,7 @@ struct mlx5_priv { struct mlx5_obj_ops obj_ops; /* HW objects operations. */ LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */ LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */ - uint32_t hrxqs; /* Verbs Hash Rx queues. */ + struct mlx5_cache_list hrxqs; /* Hash Rx queues. */ LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */ LIST_HEAD(txqobj, mlx5_txq_obj) txqsobj; /* Verbs/DevX Tx queues. */ /* Indirection tables. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index fd53c4d..c332308 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -381,13 +381,6 @@ enum mlx5_flow_fate_type { MLX5_FLOW_FATE_MAX, }; -/* Hash list callback context */ -struct mlx5_flow_cb_ctx { - struct rte_eth_dev *dev; - struct rte_flow_error *error; - void *data; -}; - /* Ma
[dpdk-dev] [PATCH v2 24/25] net/mlx5: make port ID action cache thread safe
From: Xueming Li To support multi-thread flow insertion, this patch convert port id action cache list to thread safe cache list. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 ++ drivers/net/mlx5/mlx5.h | 2 +- drivers/net/mlx5/mlx5_flow.h | 15 +++-- drivers/net/mlx5/mlx5_flow_dv.c | 141 +-- 4 files changed, 94 insertions(+), 71 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index b0dcb40..4e56ded 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -236,6 +236,12 @@ goto error; /* The resources below are only valid with DV support. */ #ifdef HAVE_IBV_FLOW_DV_SUPPORT + /* Init port id action cache list. */ + snprintf(s, sizeof(s), "%s_port_id_action_cache", sh->ibdev_name); + mlx5_cache_list_init(&sh->port_id_action_list, s, 0, sh, +flow_dv_port_id_create_cb, +flow_dv_port_id_match_cb, +flow_dv_port_id_remove_cb); /* Create tags hash list table. */ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0, @@ -417,6 +423,7 @@ mlx5_hlist_destroy(sh->tag_table); sh->tag_table = NULL; } + mlx5_cache_list_destroy(&sh->port_id_action_list); mlx5_free_table_hash_list(priv); } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 30ab09b..60a9ab9 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -646,7 +646,7 @@ struct mlx5_dev_ctx_shared { struct mlx5_hlist *encaps_decaps; /* Encap/decap action hash list. */ struct mlx5_hlist *modify_cmds; struct mlx5_hlist *tag_table; - uint32_t port_id_action_list; /* List of port ID actions. */ + struct mlx5_cache_list port_id_action_list; /* Port ID action cache. */ uint32_t push_vlan_action_list; /* List of push VLAN actions. */ uint32_t sample_action_list; /* List of sample actions. */ uint32_t dest_array_list; /* List of destination array actions. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index eac8916..dbbbcd1 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -482,12 +482,10 @@ struct mlx5_flow_dv_jump_tbl_resource { /* Port ID resource structure. */ struct mlx5_flow_dv_port_id_action_resource { - ILIST_ENTRY(uint32_t)next; - /* Pointer to next element. */ - rte_atomic32_t refcnt; /**< Reference counter. */ - void *action; - /**< Action object. */ + struct mlx5_cache_entry entry; + void *action; /**< Action object. */ uint32_t port_id; /**< Port ID value. */ + uint32_t idx; /**< Indexed pool memory index. */ }; /* Push VLAN action resource structure */ @@ -1177,4 +1175,11 @@ struct mlx5_cache_entry *flow_dv_matcher_create_cb(struct mlx5_cache_list *list, void flow_dv_matcher_remove_cb(struct mlx5_cache_list *list, struct mlx5_cache_entry *entry); +int flow_dv_port_id_match_cb(struct mlx5_cache_list *list, +struct mlx5_cache_entry *entry, void *cb_ctx); +struct mlx5_cache_entry *flow_dv_port_id_create_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry, void *cb_ctx); +void flow_dv_port_id_remove_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry); + #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 2d0ef3a..9fa902a 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -2960,6 +2960,52 @@ struct mlx5_hlist_entry * return 0; } +int +flow_dv_port_id_match_cb(struct mlx5_cache_list *list __rte_unused, +struct mlx5_cache_entry *entry, void *cb_ctx) +{ + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_port_id_action_resource *ref = ctx->data; + struct mlx5_flow_dv_port_id_action_resource *res = + container_of(entry, typeof(*res), entry); + + return ref->port_id != res->port_id; +} + +struct mlx5_cache_entry * +flow_dv_port_id_create_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry __rte_unused, + void *cb_ctx) +{ + struct mlx5_dev_ctx_shared *sh = list->ctx; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_port_id_action_resource *ref = ctx->data; + struct mlx5_flow_dv_port_id_action_resource *cache; + uint32_t idx; + int ret; + + /* Register new port id action resource. */ + cache = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_PORT_ID], &idx); + if (!cache)
[dpdk-dev] [PATCH v2 25/25] net/mlx5: make push VLAN action cache thread safe
From: Xueming Li To support multi-thread flow insertion, this patch converts push VLAN action cache list to thread safe cache list. Signed-off-by: Xueming Li Acked-by: Matan Azrad --- drivers/net/mlx5/linux/mlx5_os.c | 7 ++ drivers/net/mlx5/mlx5.h | 2 +- drivers/net/mlx5/mlx5_flow.h | 13 +++- drivers/net/mlx5/mlx5_flow_dv.c | 157 +-- 4 files changed, 102 insertions(+), 77 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 4e56ded..535c644 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -242,6 +242,12 @@ flow_dv_port_id_create_cb, flow_dv_port_id_match_cb, flow_dv_port_id_remove_cb); + /* Init push vlan action cache list. */ + snprintf(s, sizeof(s), "%s_push_vlan_action_cache", sh->ibdev_name); + mlx5_cache_list_init(&sh->push_vlan_action_list, s, 0, sh, +flow_dv_push_vlan_create_cb, +flow_dv_push_vlan_match_cb, +flow_dv_push_vlan_remove_cb); /* Create tags hash list table. */ snprintf(s, sizeof(s), "%s_tags", sh->ibdev_name); sh->tag_table = mlx5_hlist_create(s, MLX5_TAGS_HLIST_ARRAY_SIZE, 0, @@ -424,6 +430,7 @@ sh->tag_table = NULL; } mlx5_cache_list_destroy(&sh->port_id_action_list); + mlx5_cache_list_destroy(&sh->push_vlan_action_list); mlx5_free_table_hash_list(priv); } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 60a9ab9..908e53a 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -647,7 +647,7 @@ struct mlx5_dev_ctx_shared { struct mlx5_hlist *modify_cmds; struct mlx5_hlist *tag_table; struct mlx5_cache_list port_id_action_list; /* Port ID action cache. */ - uint32_t push_vlan_action_list; /* List of push VLAN actions. */ + struct mlx5_cache_list push_vlan_action_list; /* Push VLAN actions. */ uint32_t sample_action_list; /* List of sample actions. */ uint32_t dest_array_list; /* List of destination array actions. */ struct mlx5_flow_counter_mng cmng; /* Counters management structure. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index dbbbcd1..b7b3766 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -490,12 +490,11 @@ struct mlx5_flow_dv_port_id_action_resource { /* Push VLAN action resource structure */ struct mlx5_flow_dv_push_vlan_action_resource { - ILIST_ENTRY(uint32_t)next; - /* Pointer to next element. */ - rte_atomic32_t refcnt; /**< Reference counter. */ + struct mlx5_cache_entry entry; /* Cache entry. */ void *action; /**< Action object. */ uint8_t ft_type; /**< Flow table type, Rx, Tx or FDB. */ rte_be32_t vlan_tag; /**< VLAN tag value. */ + uint32_t idx; /**< Indexed pool memory index. */ }; /* Metadata register copy table entry. */ @@ -1182,4 +1181,12 @@ struct mlx5_cache_entry *flow_dv_port_id_create_cb(struct mlx5_cache_list *list, void flow_dv_port_id_remove_cb(struct mlx5_cache_list *list, struct mlx5_cache_entry *entry); +int flow_dv_push_vlan_match_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry, void *cb_ctx); +struct mlx5_cache_entry *flow_dv_push_vlan_create_cb + (struct mlx5_cache_list *list, +struct mlx5_cache_entry *entry, void *cb_ctx); +void flow_dv_push_vlan_remove_cb(struct mlx5_cache_list *list, +struct mlx5_cache_entry *entry); + #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 9fa902a..2aac66c 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -3045,6 +3045,58 @@ struct mlx5_cache_entry * return 0; } +int +flow_dv_push_vlan_match_cb(struct mlx5_cache_list *list __rte_unused, +struct mlx5_cache_entry *entry, void *cb_ctx) +{ + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_push_vlan_action_resource *ref = ctx->data; + struct mlx5_flow_dv_push_vlan_action_resource *res = + container_of(entry, typeof(*res), entry); + + return ref->vlan_tag != res->vlan_tag || ref->ft_type != res->ft_type; +} + +struct mlx5_cache_entry * +flow_dv_push_vlan_create_cb(struct mlx5_cache_list *list, + struct mlx5_cache_entry *entry __rte_unused, + void *cb_ctx) +{ + struct mlx5_dev_ctx_shared *sh = list->ctx; + struct mlx5_flow_cb_ctx *ctx = cb_ctx; + struct mlx5_flow_dv_push_vlan_action_resource *ref = ctx->data; + struct
Re: [dpdk-dev] [PATCH v2 2/2] drivers: add headers install helper
> Subject: [PATCH v2 2/2] drivers: add headers install helper > > A lot of drivers export headers, reproduce the same facility than for > libraries. > > Suggested-by: Bruce Richardson > Signed-off-by: David Marchand > --- > doc/guides/contributing/coding_style.rst | 3 +++ > drivers/baseband/acc100/meson.build| 2 +- > drivers/baseband/fpga_5gnr_fec/meson.build | 2 +- > drivers/bus/ifpga/meson.build | 2 +- > drivers/bus/pci/meson.build| 2 +- > drivers/bus/vdev/meson.build | 2 +- > drivers/bus/vmbus/meson.build | 2 +- > drivers/crypto/scheduler/meson.build | 2 +- > drivers/mempool/dpaa2/meson.build | 2 +- > drivers/meson.build| 3 +++ > drivers/net/avp/meson.build| 2 +- > drivers/net/bnxt/meson.build | 2 +- > drivers/net/bonding/meson.build| 2 +- > drivers/net/dpaa/meson.build | 2 +- > drivers/net/dpaa2/meson.build | 2 +- > drivers/net/i40e/meson.build | 2 +- > drivers/net/ice/meson.build| 2 +- > drivers/net/ixgbe/meson.build | 2 +- > drivers/net/ring/meson.build | 2 +- > drivers/net/softnic/meson.build| 2 +- > drivers/net/vhost/meson.build | 2 +- > drivers/raw/dpaa2_cmdif/meson.build| 2 +- > drivers/raw/dpaa2_qdma/meson.build | 2 +- > drivers/raw/ioat/meson.build | 2 +- > drivers/raw/ntb/meson.build| 2 +- > 25 files changed, 29 insertions(+), 23 deletions(-) > > diff --git a/drivers/bus/ifpga/meson.build b/drivers/bus/ifpga/meson.build > index 3ff44d902a..4d0507f553 100644 > --- a/drivers/bus/ifpga/meson.build > +++ b/drivers/bus/ifpga/meson.build > @@ -8,5 +8,5 @@ if is_windows > endif > > deps += ['pci', 'kvargs', 'rawdev'] > -install_headers('rte_bus_ifpga.h') > +headers = files('rte_bus_ifpga.h') > sources = files('ifpga_common.c', 'ifpga_bus.c') diff --git Acked-by: Rosen Xu
Re: [dpdk-dev] [PATCH 0/5] cleanup comments and logs about config options
On Fri, Oct 23, 2020 at 12:05 AM Thomas Monjalon wrote: > > Below patches are cleaning traces of CONFIG_RTE_ after make removal, > except one occurence in app/test/test_cryptodev.c (left as exercise). > PS: In reality I don't know what must be done for this case about QAT. It seems a reintroduction with raw datapath API merge. I would apply the same as Ciara previous: https://git.dpdk.org/dpdk/diff/app/test/test_cryptodev.c?id=c2c92c5d88522bb7f149de8ea6305691d1c65505 I.e. @@ -14022,9 +14022,7 @@ test_cryptodev_qat_raw_api(void /*argv __rte_unused, int argc __rte_unused*/) RTE_STR(CRYPTODEV_NAME_QAT_SYM_PMD)); if (gbl_driver_id == -1) { - RTE_LOG(ERR, USER1, "QAT PMD must be loaded. Check that both " - "CONFIG_RTE_LIBRTE_PMD_QAT and CONFIG_RTE_LIBRTE_PMD_QAT_SYM " - "are enabled in config file to run this testsuite.\n"); + RTE_LOG(ERR, USER1, "QAT PMD must be loaded.\n"); return TEST_SKIPPED; } The rest of the series looks good to me. Acked-by: David Marchand -- David Marchand
[dpdk-dev] [PATCH] crypto/dpaa2_sec: remove dead code
RTE_LIBRTE_SECURITY_TEST never existed, the variable under this check is never used. Fixes: e117c18a1dbe ("crypto/dpaa2_sec: restructure session management") Cc: sta...@dpdk.org Signed-off-by: David Marchand --- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index afcd6bd063..7577803823 100644 --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -2818,12 +2818,6 @@ dpaa2_sec_ipsec_proto_init(struct rte_crypto_cipher_xform *cipher_xform, return 0; } -#ifdef RTE_LIBRTE_SECURITY_TEST -static uint8_t aes_cbc_iv[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }; -#endif - static int dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev, struct rte_security_session_conf *conf, -- 2.23.0
[dpdk-dev] [PATCH] app/procinfo: clean old build macro
When merging this series after Bruce changes on build macros, an old macro usage has been re-introduced. Fixes: d82d6ac64338 ("app/procinfo: add crypto security context info") Signed-off-by: David Marchand --- app/proc-info/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/proc-info/main.c b/app/proc-info/main.c index 6ca14c6639..d743209f0d 100644 --- a/app/proc-info/main.c +++ b/app/proc-info/main.c @@ -1223,7 +1223,7 @@ show_crypto(void) stats.dequeue_err_count); } -#ifdef RTE_LIBRTE_SECURITY +#ifdef RTE_LIB_SECURITY show_security_context(i); #endif } -- 2.23.0
[dpdk-dev] [PATCH 0/3] Rework CTF event description storage
Following recent increase of an internal array that was limiting CTF event descriptions, I had a second look at the code. All of this is slow path, so I see no reason in keeping this limitation and we can go with dynamic allocations. While at it, I tweaked the metadata file output. I consider this -rc2 material. -- David Marchand David Marchand (3): trace: fixup CTF event description at registration trace: remove size limit on CTF event description trace: make CTF metadata prettier lib/librte_eal/common/eal_common_trace.c | 44 ++--- lib/librte_eal/common/eal_common_trace_ctf.c | 164 +-- lib/librte_eal/common/eal_trace.h| 4 +- 3 files changed, 67 insertions(+), 145 deletions(-) -- 2.23.0
[dpdk-dev] [PATCH 1/3] trace: fixup CTF event description at registration
CTF event description is currently built by appending all fields in a single string at trace point registration. When dumping the metadata, this string is split again and inspected to fixup reserved keywords and special tokens like "." or "->". Move this fixup per field at trace point registration time so that there is no need for inspecting / string parsing when dumping metadata. Use dynamic allocations to remove an artificial size limit on the CTF event description manipulations. Signed-off-by: David Marchand --- lib/librte_eal/common/eal_common_trace.c | 5 + lib/librte_eal/common/eal_common_trace_ctf.c | 159 +-- lib/librte_eal/common/eal_trace.h| 1 + 3 files changed, 46 insertions(+), 119 deletions(-) diff --git a/lib/librte_eal/common/eal_common_trace.c b/lib/librte_eal/common/eal_common_trace.c index b6da5537fe..80b458edb6 100644 --- a/lib/librte_eal/common/eal_common_trace.c +++ b/lib/librte_eal/common/eal_common_trace.c @@ -432,11 +432,16 @@ __rte_trace_point_emit_field(size_t sz, const char *in, const char *datatype) char *field = RTE_PER_LCORE(ctf_field); int count = RTE_PER_LCORE(ctf_count); size_t size; + char *fixup; int rc; size = RTE_MAX(0, TRACE_CTF_FIELD_SIZE - 1 - count); RTE_PER_LCORE(trace_point_sz) += sz; + fixup = trace_metadata_fixup_field(in); + if (fixup != NULL) + in = fixup; rc = snprintf(RTE_PTR_ADD(field, count), size, "%s %s;", datatype, in); + free(fixup); if (rc <= 0 || (size_t)rc >= size) { RTE_PER_LCORE(trace_point_sz) = 0; trace_crit("CTF field is too long"); diff --git a/lib/librte_eal/common/eal_common_trace_ctf.c b/lib/librte_eal/common/eal_common_trace_ctf.c index 9dc91df0fb..bc86432902 100644 --- a/lib/librte_eal/common/eal_common_trace_ctf.c +++ b/lib/librte_eal/common/eal_common_trace_ctf.c @@ -220,131 +220,12 @@ meta_stream_emit(char **meta, int *offset) return meta_copy(meta, offset, str, rc); } -static void -string_fixed_replace(char *input, const char *search, const char *replace) -{ - char *found; - size_t len; - - found = strstr(input, search); - if (found == NULL) - return; - - if (strlen(found) != strlen(search)) - return; - - len = strlen(replace); - memcpy(found, replace, len); - found[len] = '\0'; -} - -static void -ctf_fixup_align(char *str) -{ - string_fixed_replace(str, "align", "_align"); -} - -static void -ctf_fixup_arrow_deref(char *str) -{ - const char *replace = "_"; - const char *search = "->"; - char *found; - size_t len; - - found = strstr(str, search); - if (found == NULL) - return; - - do { - memcpy(found, replace, strlen(replace)); - len = strlen(found + 2); - memcpy(found + 1, found + 2, len); - found[len + 1] = '\0'; - found = strstr(str, search); - } while (found != NULL); -} - -static void -ctf_fixup_dot_deref(char *str) -{ - const char *replace = "_"; - const char *search = "."; - char *found; - size_t len; - - found = strstr(str, search); - if (found == NULL) - return; - - len = strlen(replace); - do { - memcpy(found, replace, len); - found = strstr(str, search); - } while (found != NULL); -} - -static void -ctf_fixup_event(char *str) -{ - string_fixed_replace(str, "event", "_event"); -} - -static int -ctf_fixup_keyword(char *str) -{ - char dup_str[TRACE_CTF_FIELD_SIZE]; - char input[TRACE_CTF_FIELD_SIZE]; - const char *delim = ";"; - char *from; - int len; - - if (str == NULL) - return 0; - - len = strlen(str); - if (len >= TRACE_CTF_FIELD_SIZE) { - trace_err("ctf_field reached its maximum limit"); - return -EMSGSIZE; - } - - /* Create duplicate string */ - strcpy(dup_str, str); - - len = 0; - from = strtok(dup_str, delim); - while (from != NULL) { - strcpy(input, from); - ctf_fixup_align(input); - ctf_fixup_dot_deref(input); - ctf_fixup_arrow_deref(input); - ctf_fixup_event(input); - - strcpy(&input[strlen(input)], delim); - if ((len + strlen(input)) >= TRACE_CTF_FIELD_SIZE) { - trace_err("ctf_field reached its maximum limit"); - return -EMSGSIZE; - } - - strcpy(str + len, input); - len += strlen(input); - from = strtok(NULL, delim); - } - - return 0; -} - static int meta_event_emit(char **meta, int *offset, struct trace_point *tp) { char *str = NULL; int rc; - /* Fixup ctf fie