> -----Original Message----- > From: Michael Baum <michae...@nvidia.com> > Sent: Monday, April 5, 2021 17:01 > To: dev@dpdk.org > Cc: Matan Azrad <ma...@nvidia.com>; Raslan Darawsheh > <rasl...@nvidia.com>; Slava Ovsiienko <viachesl...@nvidia.com> > Subject: [PATCH 1/6] net/mlx5: separate Rx function declarations to another > file > > The mlx5_rxtx.c file contains a lot of Tx burst functions, each of those > is performance-optimized for the specific set of requested offloads. > These ones are generated on the basis of the template function and it > takes significant time to compile, just due to a large number of giant > functions generated in the same file and this compilation is not being > done in parallel with using multithreading. > > Therefore we can split the mlx5_rxtx.c file into several separate files > to allow different functions to be compiled simultaneously. > In this patch, we separate Rx function declarations to different header > file in preparation for removing them from the source file and as an > optional preparation step for further consolidation of Rx burst > functions. > > Signed-off-by: Michael Baum <michae...@nvidia.com> Acked-by: Viacheslav Ovsiienko <viachesl...@nvidia.com>
> --- > drivers/net/mlx5/linux/mlx5_mp_os.c | 1 + > drivers/net/mlx5/linux/mlx5_os.c | 1 + > drivers/net/mlx5/linux/mlx5_verbs.c | 1 + > drivers/net/mlx5/mlx5.c | 1 + > drivers/net/mlx5/mlx5_devx.c | 1 + > drivers/net/mlx5/mlx5_ethdev.c | 1 + > drivers/net/mlx5/mlx5_flow.c | 1 + > drivers/net/mlx5/mlx5_flow_dv.c | 1 + > drivers/net/mlx5/mlx5_flow_verbs.c | 1 + > drivers/net/mlx5/mlx5_mr.c | 1 + > drivers/net/mlx5/mlx5_rss.c | 1 + > drivers/net/mlx5/mlx5_rx.h | 598 > ++++++++++++++++++++++++++++++++++++ > drivers/net/mlx5/mlx5_rxq.c | 1 + > drivers/net/mlx5/mlx5_rxtx.c | 1 + > drivers/net/mlx5/mlx5_rxtx.h | 569 ---------------------------------- > drivers/net/mlx5/mlx5_rxtx_vec.c | 1 + > drivers/net/mlx5/mlx5_stats.c | 1 + > drivers/net/mlx5/mlx5_trigger.c | 1 + > drivers/net/mlx5/mlx5_txpp.c | 1 + > drivers/net/mlx5/mlx5_vlan.c | 1 + > drivers/net/mlx5/windows/mlx5_os.c | 1 + > 21 files changed, 617 insertions(+), 569 deletions(-) > create mode 100644 drivers/net/mlx5/mlx5_rx.h > > diff --git a/drivers/net/mlx5/linux/mlx5_mp_os.c > b/drivers/net/mlx5/linux/mlx5_mp_os.c > index 8011ca8..63fa278 100644 > --- a/drivers/net/mlx5/linux/mlx5_mp_os.c > +++ b/drivers/net/mlx5/linux/mlx5_mp_os.c > @@ -16,6 +16,7 @@ > > #include "mlx5.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_utils.h" > > int > diff --git a/drivers/net/mlx5/linux/mlx5_os.c > b/drivers/net/mlx5/linux/mlx5_os.c > index 2d5bcab..97a28ec 100644 > --- a/drivers/net/mlx5/linux/mlx5_os.c > +++ b/drivers/net/mlx5/linux/mlx5_os.c > @@ -40,6 +40,7 @@ > #include "mlx5_common_os.h" > #include "mlx5_utils.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_autoconf.h" > #include "mlx5_mr.h" > #include "mlx5_flow.h" > diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c > b/drivers/net/mlx5/linux/mlx5_verbs.c > index c7d4b17..73096af 100644 > --- a/drivers/net/mlx5/linux/mlx5_verbs.c > +++ b/drivers/net/mlx5/linux/mlx5_verbs.c > @@ -22,6 +22,7 @@ > #include <mlx5_common_mr.h> > #include <mlx5_rxtx.h> > #include <mlx5_verbs.h> > +#include <mlx5_rx.h> > #include <mlx5_utils.h> > #include <mlx5_malloc.h> > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c > index 9557d06..6f77bc2 100644 > --- a/drivers/net/mlx5/mlx5.c > +++ b/drivers/net/mlx5/mlx5.c > @@ -35,6 +35,7 @@ > #include "mlx5.h" > #include "mlx5_utils.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_autoconf.h" > #include "mlx5_mr.h" > #include "mlx5_flow.h" > diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c > index 5c940ed..76935f6 100644 > --- a/drivers/net/mlx5/mlx5_devx.c > +++ b/drivers/net/mlx5/mlx5_devx.c > @@ -21,6 +21,7 @@ > #include "mlx5.h" > #include "mlx5_common_os.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_utils.h" > #include "mlx5_devx.h" > #include "mlx5_flow.h" > diff --git a/drivers/net/mlx5/mlx5_ethdev.c > b/drivers/net/mlx5/mlx5_ethdev.c > index 564d713..708e3a3 100644 > --- a/drivers/net/mlx5/mlx5_ethdev.c > +++ b/drivers/net/mlx5/mlx5_ethdev.c > @@ -23,6 +23,7 @@ > #include <mlx5_malloc.h> > > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_autoconf.h" > > /** > diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c > index c347f81..b3877a1 100644 > --- a/drivers/net/mlx5/mlx5_flow.c > +++ b/drivers/net/mlx5/mlx5_flow.c > @@ -30,6 +30,7 @@ > #include "mlx5_flow.h" > #include "mlx5_flow_os.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_common_os.h" > #include "rte_pmd_mlx5.h" > > diff --git a/drivers/net/mlx5/mlx5_flow_dv.c > b/drivers/net/mlx5/mlx5_flow_dv.c > index 533dadf..cac05fb 100644 > --- a/drivers/net/mlx5/mlx5_flow_dv.c > +++ b/drivers/net/mlx5/mlx5_flow_dv.c > @@ -33,6 +33,7 @@ > #include "mlx5_flow.h" > #include "mlx5_flow_os.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "rte_pmd_mlx5.h" > > #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || > !defined(HAVE_INFINIBAND_VERBS_H) > diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c > b/drivers/net/mlx5/mlx5_flow_verbs.c > index b442b9b..c331350 100644 > --- a/drivers/net/mlx5/mlx5_flow_verbs.c > +++ b/drivers/net/mlx5/mlx5_flow_verbs.c > @@ -24,6 +24,7 @@ > #include "mlx5.h" > #include "mlx5_flow.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > > #define VERBS_SPEC_INNER(item_flags) \ > (!!((item_flags) & MLX5_FLOW_LAYER_TUNNEL) ? > IBV_FLOW_SPEC_INNER : 0) > diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c > index 3255393..2014936 100644 > --- a/drivers/net/mlx5/mlx5_mr.c > +++ b/drivers/net/mlx5/mlx5_mr.c > @@ -15,6 +15,7 @@ > #include "mlx5.h" > #include "mlx5_mr.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > > struct mr_find_contig_memsegs_data { > uintptr_t addr; > diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c > index dc0131a..c32129c 100644 > --- a/drivers/net/mlx5/mlx5_rss.c > +++ b/drivers/net/mlx5/mlx5_rss.c > @@ -16,6 +16,7 @@ > #include "mlx5_defs.h" > #include "mlx5.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > > /** > * DPDK callback to update the RSS hash configuration. > diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h > new file mode 100644 > index 0000000..83b1f38 > --- /dev/null > +++ b/drivers/net/mlx5/mlx5_rx.h > @@ -0,0 +1,598 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright 2021 6WIND S.A. > + * Copyright 2021 Mellanox Technologies, Ltd > + */ > + > +#ifndef RTE_PMD_MLX5_RX_H_ > +#define RTE_PMD_MLX5_RX_H_ > + > +#include <stdint.h> > +#include <sys/queue.h> > + > +#include <rte_mbuf.h> > +#include <rte_mempool.h> > +#include <rte_common.h> > +#include <rte_spinlock.h> > + > +#include <mlx5_common_mr.h> > + > +#include "mlx5.h" > +#include "mlx5_autoconf.h" > +#include "mlx5_mr.h" > + > +/* Support tunnel matching. */ > +#define MLX5_FLOW_TUNNEL 10 > + > +struct mlx5_rxq_stats { > +#ifdef MLX5_PMD_SOFT_COUNTERS > + uint64_t ipackets; /**< Total of successfully received packets. */ > + uint64_t ibytes; /**< Total of successfully received bytes. */ > +#endif > + uint64_t idropped; /**< Total of packets dropped when RX ring full. > */ > + uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */ > +}; > + > +/* Compressed CQE context. */ > +struct rxq_zip { > + uint16_t ai; /* Array index. */ > + uint16_t ca; /* Current array index. */ > + uint16_t na; /* Next array index. */ > + uint16_t cq_ci; /* The next CQE. */ > + uint32_t cqe_cnt; /* Number of CQEs. */ > +}; > + > +/* Multi-Packet RQ buffer header. */ > +struct mlx5_mprq_buf { > + struct rte_mempool *mp; > + uint16_t refcnt; /* Atomically accessed refcnt. */ > + uint8_t pad[RTE_PKTMBUF_HEADROOM]; /* Headroom for the first > packet. */ > + struct rte_mbuf_ext_shared_info shinfos[]; > + /* > + * Shared information per stride. > + * More memory will be allocated for the first stride head-room and > for > + * the strides data. > + */ > +} __rte_cache_aligned; > + > +/* Get pointer to the first stride. */ > +#define mlx5_mprq_buf_addr(ptr, strd_n) (RTE_PTR_ADD((ptr), \ > + sizeof(struct mlx5_mprq_buf) + \ > + (strd_n) * \ > + sizeof(struct rte_mbuf_ext_shared_info) + \ > + RTE_PKTMBUF_HEADROOM)) > + > +#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 > +#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 > + > +enum mlx5_rxq_err_state { > + MLX5_RXQ_ERR_STATE_NO_ERROR = 0, > + MLX5_RXQ_ERR_STATE_NEED_RESET, > + MLX5_RXQ_ERR_STATE_NEED_READY, > +}; > + > +enum mlx5_rqx_code { > + MLX5_RXQ_CODE_EXIT = 0, > + MLX5_RXQ_CODE_NOMBUF, > + MLX5_RXQ_CODE_DROPPED, > +}; > + > +struct mlx5_eth_rxseg { > + struct rte_mempool *mp; /**< Memory pool to allocate segment > from. */ > + uint16_t length; /**< Segment data length, configures split point. */ > + uint16_t offset; /**< Data offset from beginning of mbuf data > buffer. */ > + uint32_t reserved; /**< Reserved field. */ > +}; > + > +/* RX queue descriptor. */ > +struct mlx5_rxq_data { > + unsigned int csum:1; /* Enable checksum offloading. */ > + unsigned int hw_timestamp:1; /* Enable HW timestamp. */ > + unsigned int rt_timestamp:1; /* Realtime timestamp format. */ > + unsigned int vlan_strip:1; /* Enable VLAN stripping. */ > + unsigned int crc_present:1; /* CRC must be subtracted. */ > + unsigned int sges_n:3; /* Log 2 of SGEs (max buffers per packet). */ > + unsigned int cqe_n:4; /* Log 2 of CQ elements. */ > + unsigned int elts_n:4; /* Log 2 of Mbufs. */ > + unsigned int rss_hash:1; /* RSS hash result is enabled. */ > + unsigned int mark:1; /* Marked flow available on the queue. */ > + unsigned int strd_num_n:5; /* Log 2 of the number of stride. */ > + unsigned int strd_sz_n:4; /* Log 2 of stride size. */ > + unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ > + unsigned int err_state:2; /* enum mlx5_rxq_err_state. */ > + unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ > + unsigned int lro:1; /* Enable LRO. */ > + unsigned int dynf_meta:1; /* Dynamic metadata is configured. */ > + unsigned int mcqe_format:3; /* CQE compression format. */ > + volatile uint32_t *rq_db; > + volatile uint32_t *cq_db; > + uint16_t port_id; > + uint32_t elts_ci; > + uint32_t rq_ci; > + uint16_t consumed_strd; /* Number of consumed strides in WQE. */ > + uint32_t rq_pi; > + uint32_t cq_ci; > + uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */ > + uint32_t byte_mask; > + union { > + struct rxq_zip zip; /* Compressed context. */ > + uint16_t decompressed; > + /* Number of ready mbufs decompressed from the CQ. */ > + }; > + struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ > + uint16_t mprq_max_memcpy_len; /* Maximum size of packet to > memcpy. */ > + volatile void *wqes; > + volatile struct mlx5_cqe(*cqes)[]; > + struct rte_mbuf *(*elts)[]; > + struct mlx5_mprq_buf *(*mprq_bufs)[]; > + struct rte_mempool *mp; > + struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. > */ > + struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. > */ > + struct mlx5_dev_ctx_shared *sh; /* Shared context. */ > + uint16_t idx; /* Queue index. */ > + struct mlx5_rxq_stats stats; > + rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. > */ > + struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ > + void *cq_uar; /* Verbs CQ user access region. */ > + uint32_t cqn; /* CQ number. */ > + uint8_t cq_arm_sn; /* CQ arm seq number. */ > +#ifndef RTE_ARCH_64 > + rte_spinlock_t *uar_lock_cq; > + /* CQ (UAR) access lock required for 32bit implementations */ > +#endif > + uint32_t tunnel; /* Tunnel information. */ > + int timestamp_offset; /* Dynamic mbuf field for timestamp. */ > + uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */ > + uint64_t flow_meta_mask; > + int32_t flow_meta_offset; > + uint32_t flow_meta_port_mask; > + uint32_t rxseg_n; /* Number of split segment descriptions. */ > + struct mlx5_eth_rxseg rxseg[MLX5_MAX_RXQ_NSEG]; > + /* Buffer split segment descriptions - sizes, offsets, pools. */ > +} __rte_cache_aligned; > + > +enum mlx5_rxq_type { > + MLX5_RXQ_TYPE_STANDARD, /* Standard Rx queue. */ > + MLX5_RXQ_TYPE_HAIRPIN, /* Hairpin Rx queue. */ > + MLX5_RXQ_TYPE_UNDEFINED, > +}; > + > +/* RX queue control descriptor. */ > +struct mlx5_rxq_ctrl { > + struct mlx5_rxq_data rxq; /* Data path structure. */ > + LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ > + uint32_t refcnt; /* Reference counter. */ > + struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */ > + struct mlx5_priv *priv; /* Back pointer to private data. */ > + enum mlx5_rxq_type type; /* Rxq type. */ > + unsigned int socket; /* CPU socket ID for allocations. */ > + unsigned int irq:1; /* Whether IRQ is enabled. */ > + uint32_t flow_mark_n; /* Number of Mark/Flag flows using this > Queue. */ > + uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels > counters. */ > + uint32_t wqn; /* WQ number. */ > + uint16_t dump_file_n; /* Number of dump files. */ > + struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ > + uint32_t hairpin_status; /* Hairpin binding status. */ > +}; > + > +/* mlx5_rxq.c */ > + > +extern uint8_t rss_hash_default_key[]; > + > +unsigned int mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data); > +int mlx5_mprq_free_mp(struct rte_eth_dev *dev); > +int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev); > +int mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); > +int mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); > +int mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t > queue_id); > +int mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t > queue_id); > +int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t > desc, > + unsigned int socket, const struct rte_eth_rxconf > *conf, > + struct rte_mempool *mp); > +int mlx5_rx_hairpin_queue_setup > + (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, > + const struct rte_eth_hairpin_conf *hairpin_conf); > +void mlx5_rx_queue_release(void *dpdk_rxq); > +int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev); > +void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev); > +int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); > +int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); > +int mlx5_rxq_obj_verify(struct rte_eth_dev *dev); > +struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t > idx, > + uint16_t desc, unsigned int socket, > + const struct rte_eth_rxconf *conf, > + const struct rte_eth_rxseg_split *rx_seg, > + uint16_t n_seg); > +struct mlx5_rxq_ctrl *mlx5_rxq_hairpin_new > + (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, > + const struct rte_eth_hairpin_conf *hairpin_conf); > +struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx); > +int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx); > +int mlx5_rxq_verify(struct rte_eth_dev *dev); > +int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl); > +int mlx5_ind_table_obj_verify(struct rte_eth_dev *dev); > +struct mlx5_ind_table_obj *mlx5_ind_table_obj_get(struct rte_eth_dev > *dev, > + const uint16_t *queues, > + uint32_t queues_n); > +int mlx5_ind_table_obj_release(struct rte_eth_dev *dev, > + struct mlx5_ind_table_obj *ind_tbl, > + bool standalone); > +int mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, > + struct mlx5_ind_table_obj *ind_tbl); > +int mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, > + struct mlx5_ind_table_obj *ind_tbl, > + uint16_t *queues, const uint32_t queues_n, > + bool standalone); > +struct mlx5_cache_entry *mlx5_hrxq_create_cb(struct mlx5_cache_list > *list, > + struct mlx5_cache_entry *entry __rte_unused, void > *cb_ctx); > +int mlx5_hrxq_match_cb(struct mlx5_cache_list *list, > + struct mlx5_cache_entry *entry, > + void *cb_ctx); > +void mlx5_hrxq_remove_cb(struct mlx5_cache_list *list, > + struct mlx5_cache_entry *entry); > +uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev, > + struct mlx5_flow_rss_desc *rss_desc); > +int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hxrq_idx); > +uint32_t mlx5_hrxq_verify(struct rte_eth_dev *dev); > +enum mlx5_rxq_type mlx5_rxq_get_type(struct rte_eth_dev *dev, > uint16_t idx); > +const struct rte_eth_hairpin_conf *mlx5_rxq_get_hairpin_conf > + (struct rte_eth_dev *dev, uint16_t idx); > +struct mlx5_hrxq *mlx5_drop_action_create(struct rte_eth_dev *dev); > +void mlx5_drop_action_destroy(struct rte_eth_dev *dev); > +uint64_t mlx5_get_rx_port_offloads(void); > +uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev); > +void mlx5_rxq_timestamp_set(struct rte_eth_dev *dev); > +int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx, > + const uint8_t *rss_key, uint32_t rss_key_len, > + uint64_t hash_fields, > + const uint16_t *queues, uint32_t queues_n); > + > +/* mlx5_rxtx.c */ > + > +uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t > pkts_n); > +void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); > +__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t > vec); > +void mlx5_mprq_buf_free_cb(void *addr, void *opaque); > +void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); > +uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, > + uint16_t pkts_n); > +uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, > + uint16_t pkts_n); > +int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); > +uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t > rx_queue_id); > +void mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, > + struct rte_eth_rxq_info *qinfo); > +int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t > rx_queue_id, > + struct rte_eth_burst_mode *mode); > + > +/* Vectorized version of mlx5_rxtx.c */ > +int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data); > +int mlx5_check_vec_rx_support(struct rte_eth_dev *dev); > +uint16_t mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, > + uint16_t pkts_n); > +uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf > **pkts, > + uint16_t pkts_n); > + > +/* mlx5_mr.c */ > + > +uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); > + > +/** > + * Query LKey from a packet buffer for Rx. No need to flush local caches for > Rx > + * as mempool is pre-configured and static. > + * > + * @param rxq > + * Pointer to Rx queue structure. > + * @param addr > + * Address to search. > + * > + * @return > + * Searched LKey on success, UINT32_MAX on no match. > + */ > +static __rte_always_inline uint32_t > +mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) > +{ > + struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; > + uint32_t lkey; > + > + /* Linear search on MR cache array. */ > + lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, > + MLX5_MR_CACHE_N, addr); > + if (likely(lkey != UINT32_MAX)) > + return lkey; > + /* Take slower bottom-half (Binary Search) on miss. */ > + return mlx5_rx_addr2mr_bh(rxq, addr); > +} > + > +#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)- > >buf_addr)) > + > +/** > + * Convert timestamp from HW format to linear counter > + * from Packet Pacing Clock Queue CQE timestamp format. > + * > + * @param sh > + * Pointer to the device shared context. Might be needed > + * to convert according current device configuration. > + * @param ts > + * Timestamp from CQE to convert. > + * @return > + * UTC in nanoseconds > + */ > +static __rte_always_inline uint64_t > +mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts) > +{ > + RTE_SET_USED(sh); > + return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S; > +} > + > +/** > + * Set timestamp in mbuf dynamic field. > + * > + * @param mbuf > + * Structure to write into. > + * @param offset > + * Dynamic field offset in mbuf structure. > + * @param timestamp > + * Value to write. > + */ > +static __rte_always_inline void > +mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset, > + rte_mbuf_timestamp_t timestamp) > +{ > + *RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = > timestamp; > +} > + > +/** > + * Replace MPRQ buffer. > + * > + * @param rxq > + * Pointer to Rx queue structure. > + * @param rq_idx > + * RQ index to replace. > + */ > +static __rte_always_inline void > +mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) > +{ > + const uint32_t strd_n = 1 << rxq->strd_num_n; > + struct mlx5_mprq_buf *rep = rxq->mprq_repl; > + volatile struct mlx5_wqe_data_seg *wqe = > + &((volatile struct mlx5_wqe_mprq *)rxq- > >wqes)[rq_idx].dseg; > + struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx]; > + void *addr; > + > + if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) { > + MLX5_ASSERT(rep != NULL); > + /* Replace MPRQ buf. */ > + (*rxq->mprq_bufs)[rq_idx] = rep; > + /* Replace WQE. */ > + addr = mlx5_mprq_buf_addr(rep, strd_n); > + wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); > + /* If there's only one MR, no need to replace LKey in WQE. */ > + if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > > 1)) > + wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); > + /* Stash a mbuf for next replacement. */ > + if (likely(!rte_mempool_get(rxq->mprq_mp, (void > **)&rep))) > + rxq->mprq_repl = rep; > + else > + rxq->mprq_repl = NULL; > + /* Release the old buffer. */ > + mlx5_mprq_buf_free(buf); > + } else if (unlikely(rxq->mprq_repl == NULL)) { > + struct mlx5_mprq_buf *rep; > + > + /* > + * Currently, the MPRQ mempool is out of buffer > + * and doing memcpy regardless of the size of Rx > + * packet. Retry allocation to get back to > + * normal. > + */ > + if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep)) > + rxq->mprq_repl = rep; > + } > +} > + > +/** > + * Attach or copy MPRQ buffer content to a packet. > + * > + * @param rxq > + * Pointer to Rx queue structure. > + * @param pkt > + * Pointer to a packet to fill. > + * @param len > + * Packet length. > + * @param buf > + * Pointer to a MPRQ buffer to take the data from. > + * @param strd_idx > + * Stride index to start from. > + * @param strd_cnt > + * Number of strides to consume. > + */ > +static __rte_always_inline enum mlx5_rqx_code > +mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, > uint32_t len, > + struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t > strd_cnt) > +{ > + const uint32_t strd_n = 1 << rxq->strd_num_n; > + const uint16_t strd_sz = 1 << rxq->strd_sz_n; > + const uint16_t strd_shift = > + MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; > + const int32_t hdrm_overlap = > + len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; > + const uint32_t offset = strd_idx * strd_sz + strd_shift; > + void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), > offset); > + > + /* > + * Memcpy packets to the target mbuf if: > + * - The size of packet is smaller than mprq_max_memcpy_len. > + * - Out of buffer in the Mempool for Multi-Packet RQ. > + * - The packet's stride overlaps a headroom and scatter is off. > + */ > + if (len <= rxq->mprq_max_memcpy_len || > + rxq->mprq_repl == NULL || > + (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { > + if (likely(len <= > + (uint32_t)(pkt->buf_len - > RTE_PKTMBUF_HEADROOM))) { > + rte_memcpy(rte_pktmbuf_mtod(pkt, void *), > + addr, len); > + DATA_LEN(pkt) = len; > + } else if (rxq->strd_scatter_en) { > + struct rte_mbuf *prev = pkt; > + uint32_t seg_len = RTE_MIN(len, (uint32_t) > + (pkt->buf_len - > RTE_PKTMBUF_HEADROOM)); > + uint32_t rem_len = len - seg_len; > + > + rte_memcpy(rte_pktmbuf_mtod(pkt, void *), > + addr, seg_len); > + DATA_LEN(pkt) = seg_len; > + while (rem_len) { > + struct rte_mbuf *next = > + rte_pktmbuf_alloc(rxq->mp); > + > + if (unlikely(next == NULL)) > + return MLX5_RXQ_CODE_NOMBUF; > + NEXT(prev) = next; > + SET_DATA_OFF(next, 0); > + addr = RTE_PTR_ADD(addr, seg_len); > + seg_len = RTE_MIN(rem_len, (uint32_t) > + (next->buf_len - > RTE_PKTMBUF_HEADROOM)); > + rte_memcpy > + (rte_pktmbuf_mtod(next, void *), > + addr, seg_len); > + DATA_LEN(next) = seg_len; > + rem_len -= seg_len; > + prev = next; > + ++NB_SEGS(pkt); > + } > + } else { > + return MLX5_RXQ_CODE_DROPPED; > + } > + } else { > + rte_iova_t buf_iova; > + struct rte_mbuf_ext_shared_info *shinfo; > + uint16_t buf_len = strd_cnt * strd_sz; > + void *buf_addr; > + > + /* Increment the refcnt of the whole chunk. */ > + __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED); > + MLX5_ASSERT(__atomic_load_n(&buf->refcnt, > + __ATOMIC_RELAXED) <= strd_n + 1); > + buf_addr = RTE_PTR_SUB(addr, > RTE_PKTMBUF_HEADROOM); > + /* > + * MLX5 device doesn't use iova but it is necessary in a > + * case where the Rx packet is transmitted via a > + * different PMD. > + */ > + buf_iova = rte_mempool_virt2iova(buf) + > + RTE_PTR_DIFF(buf_addr, buf); > + shinfo = &buf->shinfos[strd_idx]; > + rte_mbuf_ext_refcnt_set(shinfo, 1); > + /* > + * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when > + * attaching the stride to mbuf and more offload flags > + * will be added below by calling rxq_cq_to_mbuf(). > + * Other fields will be overwritten. > + */ > + rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, > + buf_len, shinfo); > + /* Set mbuf head-room. */ > + SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); > + MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); > + MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= > + len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); > + DATA_LEN(pkt) = len; > + /* > + * Copy the last fragment of a packet (up to headroom > + * size bytes) in case there is a stride overlap with > + * a next packet's headroom. Allocate a separate mbuf > + * to store this fragment and link it. Scatter is on. > + */ > + if (hdrm_overlap > 0) { > + MLX5_ASSERT(rxq->strd_scatter_en); > + struct rte_mbuf *seg = > + rte_pktmbuf_alloc(rxq->mp); > + > + if (unlikely(seg == NULL)) > + return MLX5_RXQ_CODE_NOMBUF; > + SET_DATA_OFF(seg, 0); > + rte_memcpy(rte_pktmbuf_mtod(seg, void *), > + RTE_PTR_ADD(addr, len - hdrm_overlap), > + hdrm_overlap); > + DATA_LEN(seg) = hdrm_overlap; > + DATA_LEN(pkt) = len - hdrm_overlap; > + NEXT(pkt) = seg; > + NB_SEGS(pkt) = 2; > + } > + } > + return MLX5_RXQ_CODE_EXIT; > +} > + > +/** > + * Check whether Multi-Packet RQ can be enabled for the device. > + * > + * @param dev > + * Pointer to Ethernet device. > + * > + * @return > + * 1 if supported, negative errno value if not. > + */ > +static __rte_always_inline int > +mlx5_check_mprq_support(struct rte_eth_dev *dev) > +{ > + struct mlx5_priv *priv = dev->data->dev_private; > + > + if (priv->config.mprq.enabled && > + priv->rxqs_n >= priv->config.mprq.min_rxqs_num) > + return 1; > + return -ENOTSUP; > +} > + > +/** > + * Check whether Multi-Packet RQ is enabled for the Rx queue. > + * > + * @param rxq > + * Pointer to receive queue structure. > + * > + * @return > + * 0 if disabled, otherwise enabled. > + */ > +static __rte_always_inline int > +mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) > +{ > + return rxq->strd_num_n > 0; > +} > + > +/** > + * Check whether Multi-Packet RQ is enabled for the device. > + * > + * @param dev > + * Pointer to Ethernet device. > + * > + * @return > + * 0 if disabled, otherwise enabled. > + */ > +static __rte_always_inline int > +mlx5_mprq_enabled(struct rte_eth_dev *dev) > +{ > + struct mlx5_priv *priv = dev->data->dev_private; > + uint32_t i; > + uint16_t n = 0; > + uint16_t n_ibv = 0; > + > + if (mlx5_check_mprq_support(dev) < 0) > + return 0; > + /* All the configured queues should be enabled. */ > + for (i = 0; i < priv->rxqs_n; ++i) { > + struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; > + struct mlx5_rxq_ctrl *rxq_ctrl = container_of > + (rxq, struct mlx5_rxq_ctrl, rxq); > + > + if (rxq == NULL || rxq_ctrl->type != > MLX5_RXQ_TYPE_STANDARD) > + continue; > + n_ibv++; > + if (mlx5_rxq_mprq_enabled(rxq)) > + ++n; > + } > + /* Multi-Packet RQ can't be partially configured. */ > + MLX5_ASSERT(n == 0 || n == n_ibv); > + return n == n_ibv; > +} > + > +#endif /* RTE_PMD_MLX5_RX_H_ */ > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c > index 9009eb8..19df0fa 100644 > --- a/drivers/net/mlx5/mlx5_rxq.c > +++ b/drivers/net/mlx5/mlx5_rxq.c > @@ -25,6 +25,7 @@ > #include "mlx5_defs.h" > #include "mlx5.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_utils.h" > #include "mlx5_autoconf.h" > > diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c > index c76b995..d004e1e 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.c > +++ b/drivers/net/mlx5/mlx5_rxtx.c > @@ -25,6 +25,7 @@ > #include "mlx5_mr.h" > #include "mlx5_utils.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > > /* TX burst subroutines return codes. */ > enum mlx5_txcmp_code { > diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h > index 4f0fda0..d443db4 100644 > --- a/drivers/net/mlx5/mlx5_rxtx.h > +++ b/drivers/net/mlx5/mlx5_rxtx.h > @@ -31,21 +31,10 @@ > #include "mlx5_autoconf.h" > #include "mlx5_mr.h" > > -/* Support tunnel matching. */ > -#define MLX5_FLOW_TUNNEL 10 > > /* Mbuf dynamic flag offset for inline. */ > extern uint64_t rte_net_mlx5_dynf_inline_mask; > > -struct mlx5_rxq_stats { > -#ifdef MLX5_PMD_SOFT_COUNTERS > - uint64_t ipackets; /**< Total of successfully received packets. */ > - uint64_t ibytes; /**< Total of successfully received bytes. */ > -#endif > - uint64_t idropped; /**< Total of packets dropped when RX ring full. > */ > - uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */ > -}; > - > struct mlx5_txq_stats { > #ifdef MLX5_PMD_SOFT_COUNTERS > uint64_t opackets; /**< Total of successfully sent packets. */ > @@ -56,148 +45,6 @@ struct mlx5_txq_stats { > > struct mlx5_priv; > > -/* Compressed CQE context. */ > -struct rxq_zip { > - uint16_t ai; /* Array index. */ > - uint16_t ca; /* Current array index. */ > - uint16_t na; /* Next array index. */ > - uint16_t cq_ci; /* The next CQE. */ > - uint32_t cqe_cnt; /* Number of CQEs. */ > -}; > - > -/* Multi-Packet RQ buffer header. */ > -struct mlx5_mprq_buf { > - struct rte_mempool *mp; > - uint16_t refcnt; /* Atomically accessed refcnt. */ > - uint8_t pad[RTE_PKTMBUF_HEADROOM]; /* Headroom for the first > packet. */ > - struct rte_mbuf_ext_shared_info shinfos[]; > - /* > - * Shared information per stride. > - * More memory will be allocated for the first stride head-room and > for > - * the strides data. > - */ > -} __rte_cache_aligned; > - > -/* Get pointer to the first stride. */ > -#define mlx5_mprq_buf_addr(ptr, strd_n) (RTE_PTR_ADD((ptr), \ > - sizeof(struct mlx5_mprq_buf) + \ > - (strd_n) * \ > - sizeof(struct rte_mbuf_ext_shared_info) + \ > - RTE_PKTMBUF_HEADROOM)) > - > -#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 > -#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9 > - > -enum mlx5_rxq_err_state { > - MLX5_RXQ_ERR_STATE_NO_ERROR = 0, > - MLX5_RXQ_ERR_STATE_NEED_RESET, > - MLX5_RXQ_ERR_STATE_NEED_READY, > -}; > - > -enum mlx5_rqx_code { > - MLX5_RXQ_CODE_EXIT = 0, > - MLX5_RXQ_CODE_NOMBUF, > - MLX5_RXQ_CODE_DROPPED, > -}; > - > -struct mlx5_eth_rxseg { > - struct rte_mempool *mp; /**< Memory pool to allocate segment > from. */ > - uint16_t length; /**< Segment data length, configures split point. */ > - uint16_t offset; /**< Data offset from beginning of mbuf data > buffer. */ > - uint32_t reserved; /**< Reserved field. */ > -}; > - > -/* RX queue descriptor. */ > -struct mlx5_rxq_data { > - unsigned int csum:1; /* Enable checksum offloading. */ > - unsigned int hw_timestamp:1; /* Enable HW timestamp. */ > - unsigned int rt_timestamp:1; /* Realtime timestamp format. */ > - unsigned int vlan_strip:1; /* Enable VLAN stripping. */ > - unsigned int crc_present:1; /* CRC must be subtracted. */ > - unsigned int sges_n:3; /* Log 2 of SGEs (max buffers per packet). */ > - unsigned int cqe_n:4; /* Log 2 of CQ elements. */ > - unsigned int elts_n:4; /* Log 2 of Mbufs. */ > - unsigned int rss_hash:1; /* RSS hash result is enabled. */ > - unsigned int mark:1; /* Marked flow available on the queue. */ > - unsigned int strd_num_n:5; /* Log 2 of the number of stride. */ > - unsigned int strd_sz_n:4; /* Log 2 of stride size. */ > - unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ > - unsigned int err_state:2; /* enum mlx5_rxq_err_state. */ > - unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ > - unsigned int lro:1; /* Enable LRO. */ > - unsigned int dynf_meta:1; /* Dynamic metadata is configured. */ > - unsigned int mcqe_format:3; /* CQE compression format. */ > - volatile uint32_t *rq_db; > - volatile uint32_t *cq_db; > - uint16_t port_id; > - uint32_t elts_ci; > - uint32_t rq_ci; > - uint16_t consumed_strd; /* Number of consumed strides in WQE. */ > - uint32_t rq_pi; > - uint32_t cq_ci; > - uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */ > - uint32_t byte_mask; > - union { > - struct rxq_zip zip; /* Compressed context. */ > - uint16_t decompressed; > - /* Number of ready mbufs decompressed from the CQ. */ > - }; > - struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ > - uint16_t mprq_max_memcpy_len; /* Maximum size of packet to > memcpy. */ > - volatile void *wqes; > - volatile struct mlx5_cqe(*cqes)[]; > - struct rte_mbuf *(*elts)[]; > - struct mlx5_mprq_buf *(*mprq_bufs)[]; > - struct rte_mempool *mp; > - struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. > */ > - struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. > */ > - struct mlx5_dev_ctx_shared *sh; /* Shared context. */ > - uint16_t idx; /* Queue index. */ > - struct mlx5_rxq_stats stats; > - rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. > */ > - struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ > - void *cq_uar; /* Verbs CQ user access region. */ > - uint32_t cqn; /* CQ number. */ > - uint8_t cq_arm_sn; /* CQ arm seq number. */ > -#ifndef RTE_ARCH_64 > - rte_spinlock_t *uar_lock_cq; > - /* CQ (UAR) access lock required for 32bit implementations */ > -#endif > - uint32_t tunnel; /* Tunnel information. */ > - int timestamp_offset; /* Dynamic mbuf field for timestamp. */ > - uint64_t timestamp_rx_flag; /* Dynamic mbuf flag for timestamp. */ > - uint64_t flow_meta_mask; > - int32_t flow_meta_offset; > - uint32_t flow_meta_port_mask; > - uint32_t rxseg_n; /* Number of split segment descriptions. */ > - struct mlx5_eth_rxseg rxseg[MLX5_MAX_RXQ_NSEG]; > - /* Buffer split segment descriptions - sizes, offsets, pools. */ > -} __rte_cache_aligned; > - > -enum mlx5_rxq_type { > - MLX5_RXQ_TYPE_STANDARD, /* Standard Rx queue. */ > - MLX5_RXQ_TYPE_HAIRPIN, /* Hairpin Rx queue. */ > - MLX5_RXQ_TYPE_UNDEFINED, > -}; > - > -/* RX queue control descriptor. */ > -struct mlx5_rxq_ctrl { > - struct mlx5_rxq_data rxq; /* Data path structure. */ > - LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ > - uint32_t refcnt; /* Reference counter. */ > - struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */ > - struct mlx5_priv *priv; /* Back pointer to private data. */ > - enum mlx5_rxq_type type; /* Rxq type. */ > - unsigned int socket; /* CPU socket ID for allocations. */ > - unsigned int irq:1; /* Whether IRQ is enabled. */ > - uint32_t flow_mark_n; /* Number of Mark/Flag flows using this > Queue. */ > - uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels > counters. */ > - uint32_t wqn; /* WQ number. */ > - uint16_t dump_file_n; /* Number of dump files. */ > - struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ > - uint32_t hairpin_status; /* Hairpin binding status. */ > -}; > - > /* TX queue send local data. */ > __extension__ > struct mlx5_txq_local { > @@ -302,80 +149,6 @@ struct mlx5_txq_ctrl { > #define MLX5_TX_BFREG(txq) \ > (MLX5_PROC_PRIV((txq)->port_id)->uar_table[(txq)->idx]) > > -/* mlx5_rxq.c */ > - > -extern uint8_t rss_hash_default_key[]; > - > -unsigned int mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data); > -int mlx5_mprq_free_mp(struct rte_eth_dev *dev); > -int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev); > -int mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); > -int mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); > -int mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t > queue_id); > -int mlx5_rx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t > queue_id); > -int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t > desc, > - unsigned int socket, const struct rte_eth_rxconf > *conf, > - struct rte_mempool *mp); > -int mlx5_rx_hairpin_queue_setup > - (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, > - const struct rte_eth_hairpin_conf *hairpin_conf); > -void mlx5_rx_queue_release(void *dpdk_rxq); > -int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev); > -void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev); > -int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); > -int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); > -int mlx5_rxq_obj_verify(struct rte_eth_dev *dev); > -struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, > - uint16_t desc, unsigned int socket, > - const struct rte_eth_rxconf *conf, > - const struct rte_eth_rxseg_split *rx_seg, > - uint16_t n_seg); > -struct mlx5_rxq_ctrl *mlx5_rxq_hairpin_new > - (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, > - const struct rte_eth_hairpin_conf *hairpin_conf); > -struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx); > -int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx); > -int mlx5_rxq_verify(struct rte_eth_dev *dev); > -int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl); > -int mlx5_ind_table_obj_verify(struct rte_eth_dev *dev); > -struct mlx5_ind_table_obj *mlx5_ind_table_obj_get(struct rte_eth_dev > *dev, > - const uint16_t *queues, > - uint32_t queues_n); > -int mlx5_ind_table_obj_release(struct rte_eth_dev *dev, > - struct mlx5_ind_table_obj *ind_tbl, > - bool standalone); > -int mlx5_ind_table_obj_setup(struct rte_eth_dev *dev, > - struct mlx5_ind_table_obj *ind_tbl); > -int mlx5_ind_table_obj_modify(struct rte_eth_dev *dev, > - struct mlx5_ind_table_obj *ind_tbl, > - uint16_t *queues, const uint32_t queues_n, > - bool standalone); > -struct mlx5_cache_entry *mlx5_hrxq_create_cb(struct mlx5_cache_list > *list, > - struct mlx5_cache_entry *entry __rte_unused, void > *cb_ctx); > -int mlx5_hrxq_match_cb(struct mlx5_cache_list *list, > - struct mlx5_cache_entry *entry, > - void *cb_ctx); > -void mlx5_hrxq_remove_cb(struct mlx5_cache_list *list, > - struct mlx5_cache_entry *entry); > -uint32_t mlx5_hrxq_get(struct rte_eth_dev *dev, > - struct mlx5_flow_rss_desc *rss_desc); > -int mlx5_hrxq_release(struct rte_eth_dev *dev, uint32_t hxrq_idx); > -uint32_t mlx5_hrxq_verify(struct rte_eth_dev *dev); > - > - > -enum mlx5_rxq_type mlx5_rxq_get_type(struct rte_eth_dev *dev, > uint16_t idx); > -const struct rte_eth_hairpin_conf *mlx5_rxq_get_hairpin_conf > - (struct rte_eth_dev *dev, uint16_t idx); > -struct mlx5_hrxq *mlx5_drop_action_create(struct rte_eth_dev *dev); > -void mlx5_drop_action_destroy(struct rte_eth_dev *dev); > -uint64_t mlx5_get_rx_port_offloads(void); > -uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev); > -void mlx5_rxq_timestamp_set(struct rte_eth_dev *dev); > -int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx, > - const uint8_t *rss_key, uint32_t rss_key_len, > - uint64_t hash_fields, > - const uint16_t *queues, uint32_t queues_n); > - > /* mlx5_txq.c */ > > int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); > @@ -416,45 +189,21 @@ struct mlx5_txq_ctrl *mlx5_txq_hairpin_new > void mlx5_set_ptype_table(void); > void mlx5_set_cksum_table(void); > void mlx5_set_swp_types_table(void); > -uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t > pkts_n); > -void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq); > -__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t > vec); > -void mlx5_mprq_buf_free_cb(void *addr, void *opaque); > -void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); > -uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, > - uint16_t pkts_n); > uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, > uint16_t pkts_n); > -uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, > - uint16_t pkts_n); > -int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); > int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); > -uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t > rx_queue_id); > void mlx5_dump_debug_information(const char *path, const char *title, > const void *buf, unsigned int len); > int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, > const struct mlx5_mp_arg_queue_state_modify > *sm); > -void mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, > - struct rte_eth_rxq_info *qinfo); > void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, > struct rte_eth_txq_info *qinfo); > -int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t > rx_queue_id, > - struct rte_eth_burst_mode *mode); > int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t > tx_queue_id, > struct rte_eth_burst_mode *mode); > > -/* Vectorized version of mlx5_rxtx.c */ > -int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data); > -int mlx5_check_vec_rx_support(struct rte_eth_dev *dev); > -uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, > - uint16_t pkts_n); > -uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_txq, struct rte_mbuf > **pkts, > - uint16_t pkts_n); > - > /* mlx5_mr.c */ > > void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); > -uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); > uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf > *mb); > uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t > addr, > struct rte_mempool *mp); > @@ -538,35 +287,6 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > } > > /** > - * Query LKey from a packet buffer for Rx. No need to flush local caches for > Rx > - * as mempool is pre-configured and static. > - * > - * @param rxq > - * Pointer to Rx queue structure. > - * @param addr > - * Address to search. > - * > - * @return > - * Searched LKey on success, UINT32_MAX on no match. > - */ > -static __rte_always_inline uint32_t > -mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) > -{ > - struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; > - uint32_t lkey; > - > - /* Linear search on MR cache array. */ > - lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, > - MLX5_MR_CACHE_N, addr); > - if (likely(lkey != UINT32_MAX)) > - return lkey; > - /* Take slower bottom-half (Binary Search) on miss. */ > - return mlx5_rx_addr2mr_bh(rxq, addr); > -} > - > -#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)- > >buf_addr)) > - > -/** > * Query LKey from a packet buffer for Tx. If not found, add the mempool. > * > * @param txq > @@ -637,25 +357,6 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > } > > /** > - * Convert timestamp from HW format to linear counter > - * from Packet Pacing Clock Queue CQE timestamp format. > - * > - * @param sh > - * Pointer to the device shared context. Might be needed > - * to convert according current device configuration. > - * @param ts > - * Timestamp from CQE to convert. > - * @return > - * UTC in nanoseconds > - */ > -static __rte_always_inline uint64_t > -mlx5_txpp_convert_rx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t ts) > -{ > - RTE_SET_USED(sh); > - return (ts & UINT32_MAX) + (ts >> 32) * NS_PER_S; > -} > - > -/** > * Convert timestamp from mbuf format to linear counter > * of Clock Queue completions (24 bits) > * > @@ -712,274 +413,4 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, > void *addr, uint64_t iova, > return ci; > } > > -/** > - * Set timestamp in mbuf dynamic field. > - * > - * @param mbuf > - * Structure to write into. > - * @param offset > - * Dynamic field offset in mbuf structure. > - * @param timestamp > - * Value to write. > - */ > -static __rte_always_inline void > -mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset, > - rte_mbuf_timestamp_t timestamp) > -{ > - *RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = > timestamp; > -} > - > -/** > - * Replace MPRQ buffer. > - * > - * @param rxq > - * Pointer to Rx queue structure. > - * @param rq_idx > - * RQ index to replace. > - */ > -static __rte_always_inline void > -mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) > -{ > - const uint32_t strd_n = 1 << rxq->strd_num_n; > - struct mlx5_mprq_buf *rep = rxq->mprq_repl; > - volatile struct mlx5_wqe_data_seg *wqe = > - &((volatile struct mlx5_wqe_mprq *)rxq- > >wqes)[rq_idx].dseg; > - struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx]; > - void *addr; > - > - if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) { > - MLX5_ASSERT(rep != NULL); > - /* Replace MPRQ buf. */ > - (*rxq->mprq_bufs)[rq_idx] = rep; > - /* Replace WQE. */ > - addr = mlx5_mprq_buf_addr(rep, strd_n); > - wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); > - /* If there's only one MR, no need to replace LKey in WQE. */ > - if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > > 1)) > - wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); > - /* Stash a mbuf for next replacement. */ > - if (likely(!rte_mempool_get(rxq->mprq_mp, (void > **)&rep))) > - rxq->mprq_repl = rep; > - else > - rxq->mprq_repl = NULL; > - /* Release the old buffer. */ > - mlx5_mprq_buf_free(buf); > - } else if (unlikely(rxq->mprq_repl == NULL)) { > - struct mlx5_mprq_buf *rep; > - > - /* > - * Currently, the MPRQ mempool is out of buffer > - * and doing memcpy regardless of the size of Rx > - * packet. Retry allocation to get back to > - * normal. > - */ > - if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep)) > - rxq->mprq_repl = rep; > - } > -} > - > -/** > - * Attach or copy MPRQ buffer content to a packet. > - * > - * @param rxq > - * Pointer to Rx queue structure. > - * @param pkt > - * Pointer to a packet to fill. > - * @param len > - * Packet length. > - * @param buf > - * Pointer to a MPRQ buffer to take the data from. > - * @param strd_idx > - * Stride index to start from. > - * @param strd_cnt > - * Number of strides to consume. > - */ > -static __rte_always_inline enum mlx5_rqx_code > -mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, > uint32_t len, > - struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t > strd_cnt) > -{ > - const uint32_t strd_n = 1 << rxq->strd_num_n; > - const uint16_t strd_sz = 1 << rxq->strd_sz_n; > - const uint16_t strd_shift = > - MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; > - const int32_t hdrm_overlap = > - len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; > - const uint32_t offset = strd_idx * strd_sz + strd_shift; > - void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), > offset); > - > - /* > - * Memcpy packets to the target mbuf if: > - * - The size of packet is smaller than mprq_max_memcpy_len. > - * - Out of buffer in the Mempool for Multi-Packet RQ. > - * - The packet's stride overlaps a headroom and scatter is off. > - */ > - if (len <= rxq->mprq_max_memcpy_len || > - rxq->mprq_repl == NULL || > - (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { > - if (likely(len <= > - (uint32_t)(pkt->buf_len - > RTE_PKTMBUF_HEADROOM))) { > - rte_memcpy(rte_pktmbuf_mtod(pkt, void *), > - addr, len); > - DATA_LEN(pkt) = len; > - } else if (rxq->strd_scatter_en) { > - struct rte_mbuf *prev = pkt; > - uint32_t seg_len = RTE_MIN(len, (uint32_t) > - (pkt->buf_len - > RTE_PKTMBUF_HEADROOM)); > - uint32_t rem_len = len - seg_len; > - > - rte_memcpy(rte_pktmbuf_mtod(pkt, void *), > - addr, seg_len); > - DATA_LEN(pkt) = seg_len; > - while (rem_len) { > - struct rte_mbuf *next = > - rte_pktmbuf_alloc(rxq->mp); > - > - if (unlikely(next == NULL)) > - return MLX5_RXQ_CODE_NOMBUF; > - NEXT(prev) = next; > - SET_DATA_OFF(next, 0); > - addr = RTE_PTR_ADD(addr, seg_len); > - seg_len = RTE_MIN(rem_len, (uint32_t) > - (next->buf_len - > RTE_PKTMBUF_HEADROOM)); > - rte_memcpy > - (rte_pktmbuf_mtod(next, void *), > - addr, seg_len); > - DATA_LEN(next) = seg_len; > - rem_len -= seg_len; > - prev = next; > - ++NB_SEGS(pkt); > - } > - } else { > - return MLX5_RXQ_CODE_DROPPED; > - } > - } else { > - rte_iova_t buf_iova; > - struct rte_mbuf_ext_shared_info *shinfo; > - uint16_t buf_len = strd_cnt * strd_sz; > - void *buf_addr; > - > - /* Increment the refcnt of the whole chunk. */ > - __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED); > - MLX5_ASSERT(__atomic_load_n(&buf->refcnt, > - __ATOMIC_RELAXED) <= strd_n + 1); > - buf_addr = RTE_PTR_SUB(addr, > RTE_PKTMBUF_HEADROOM); > - /* > - * MLX5 device doesn't use iova but it is necessary in a > - * case where the Rx packet is transmitted via a > - * different PMD. > - */ > - buf_iova = rte_mempool_virt2iova(buf) + > - RTE_PTR_DIFF(buf_addr, buf); > - shinfo = &buf->shinfos[strd_idx]; > - rte_mbuf_ext_refcnt_set(shinfo, 1); > - /* > - * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when > - * attaching the stride to mbuf and more offload flags > - * will be added below by calling rxq_cq_to_mbuf(). > - * Other fields will be overwritten. > - */ > - rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, > - buf_len, shinfo); > - /* Set mbuf head-room. */ > - SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); > - MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); > - MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= > - len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); > - DATA_LEN(pkt) = len; > - /* > - * Copy the last fragment of a packet (up to headroom > - * size bytes) in case there is a stride overlap with > - * a next packet's headroom. Allocate a separate mbuf > - * to store this fragment and link it. Scatter is on. > - */ > - if (hdrm_overlap > 0) { > - MLX5_ASSERT(rxq->strd_scatter_en); > - struct rte_mbuf *seg = > - rte_pktmbuf_alloc(rxq->mp); > - > - if (unlikely(seg == NULL)) > - return MLX5_RXQ_CODE_NOMBUF; > - SET_DATA_OFF(seg, 0); > - rte_memcpy(rte_pktmbuf_mtod(seg, void *), > - RTE_PTR_ADD(addr, len - hdrm_overlap), > - hdrm_overlap); > - DATA_LEN(seg) = hdrm_overlap; > - DATA_LEN(pkt) = len - hdrm_overlap; > - NEXT(pkt) = seg; > - NB_SEGS(pkt) = 2; > - } > - } > - return MLX5_RXQ_CODE_EXIT; > -} > - > -/** > - * Check whether Multi-Packet RQ can be enabled for the device. > - * > - * @param dev > - * Pointer to Ethernet device. > - * > - * @return > - * 1 if supported, negative errno value if not. > - */ > -static __rte_always_inline int > -mlx5_check_mprq_support(struct rte_eth_dev *dev) > -{ > - struct mlx5_priv *priv = dev->data->dev_private; > - > - if (priv->config.mprq.enabled && > - priv->rxqs_n >= priv->config.mprq.min_rxqs_num) > - return 1; > - return -ENOTSUP; > -} > - > -/** > - * Check whether Multi-Packet RQ is enabled for the Rx queue. > - * > - * @param rxq > - * Pointer to receive queue structure. > - * > - * @return > - * 0 if disabled, otherwise enabled. > - */ > -static __rte_always_inline int > -mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq) > -{ > - return rxq->strd_num_n > 0; > -} > - > -/** > - * Check whether Multi-Packet RQ is enabled for the device. > - * > - * @param dev > - * Pointer to Ethernet device. > - * > - * @return > - * 0 if disabled, otherwise enabled. > - */ > -static __rte_always_inline int > -mlx5_mprq_enabled(struct rte_eth_dev *dev) > -{ > - struct mlx5_priv *priv = dev->data->dev_private; > - uint32_t i; > - uint16_t n = 0; > - uint16_t n_ibv = 0; > - > - if (mlx5_check_mprq_support(dev) < 0) > - return 0; > - /* All the configured queues should be enabled. */ > - for (i = 0; i < priv->rxqs_n; ++i) { > - struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; > - struct mlx5_rxq_ctrl *rxq_ctrl = container_of > - (rxq, struct mlx5_rxq_ctrl, rxq); > - > - if (rxq == NULL || rxq_ctrl->type != > MLX5_RXQ_TYPE_STANDARD) > - continue; > - n_ibv++; > - if (mlx5_rxq_mprq_enabled(rxq)) > - ++n; > - } > - /* Multi-Packet RQ can't be partially configured. */ > - MLX5_ASSERT(n == 0 || n == n_ibv); > - return n == n_ibv; > -} > #endif /* RTE_PMD_MLX5_RXTX_H_ */ > diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c > b/drivers/net/mlx5/mlx5_rxtx_vec.c > index 028e0f6..d5af2d9 100644 > --- a/drivers/net/mlx5/mlx5_rxtx_vec.c > +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c > @@ -19,6 +19,7 @@ > #include "mlx5.h" > #include "mlx5_utils.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_rxtx_vec.h" > #include "mlx5_autoconf.h" > > diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c > index a6569b2..4dbd831 100644 > --- a/drivers/net/mlx5/mlx5_stats.c > +++ b/drivers/net/mlx5/mlx5_stats.c > @@ -17,6 +17,7 @@ > #include "mlx5_defs.h" > #include "mlx5.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_malloc.h" > > /** > diff --git a/drivers/net/mlx5/mlx5_trigger.c > b/drivers/net/mlx5/mlx5_trigger.c > index 94dd567..c88cb22 100644 > --- a/drivers/net/mlx5/mlx5_trigger.c > +++ b/drivers/net/mlx5/mlx5_trigger.c > @@ -16,6 +16,7 @@ > #include "mlx5.h" > #include "mlx5_mr.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_utils.h" > #include "rte_pmd_mlx5.h" > > diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c > index e8d632a..89e1c5d 100644 > --- a/drivers/net/mlx5/mlx5_txpp.c > +++ b/drivers/net/mlx5/mlx5_txpp.c > @@ -17,6 +17,7 @@ > > #include "mlx5.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_common_os.h" > > static_assert(sizeof(struct mlx5_cqe_ts) == sizeof(rte_int128_t), > diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c > index 64678d3..60f97f2 100644 > --- a/drivers/net/mlx5/mlx5_vlan.c > +++ b/drivers/net/mlx5/mlx5_vlan.c > @@ -16,6 +16,7 @@ > #include "mlx5.h" > #include "mlx5_autoconf.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_utils.h" > #include "mlx5_devx.h" > > diff --git a/drivers/net/mlx5/windows/mlx5_os.c > b/drivers/net/mlx5/windows/mlx5_os.c > index 6f39276..79eac80 100644 > --- a/drivers/net/mlx5/windows/mlx5_os.c > +++ b/drivers/net/mlx5/windows/mlx5_os.c > @@ -23,6 +23,7 @@ > #include "mlx5_common_os.h" > #include "mlx5_utils.h" > #include "mlx5_rxtx.h" > +#include "mlx5_rx.h" > #include "mlx5_autoconf.h" > #include "mlx5_mr.h" > #include "mlx5_flow.h" > -- > 1.8.3.1