From: Pavan Nikhilesh <pbhagavat...@marvell.com> Add support for event eth Tx adapter fastpath operations.
Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> --- drivers/event/cnxk/cn10k_eventdev.c | 35 ++++++++++++ drivers/event/cnxk/cn10k_worker.c | 32 +++++++++++ drivers/event/cnxk/cn10k_worker.h | 67 ++++++++++++++++++++++ drivers/event/cnxk/cn9k_eventdev.c | 76 +++++++++++++++++++++++++ drivers/event/cnxk/cn9k_worker.c | 60 ++++++++++++++++++++ drivers/event/cnxk/cn9k_worker.h | 87 +++++++++++++++++++++++++++++ 6 files changed, 357 insertions(+) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 99d2b7a8ba..817cb08480 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -336,6 +336,22 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq_seg[2][2][2][2][2] = + { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn10k_sso_hws_enq; event_dev->enqueue_burst = cn10k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst; @@ -395,6 +411,25 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev) [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; } static void diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 4365aec992..fb26e17034 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -175,3 +175,35 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[], NIX_RX_FASTPATH_MODES #undef R + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn10k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn10k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn10k_sso_hws_event_tx( \ + ws, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index ad320d2dc0..b3f71202ad 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -11,6 +11,7 @@ #include "cn10k_ethdev.h" #include "cn10k_rx.h" +#include "cn10k_tx.h" /* SSO Operations */ @@ -239,4 +240,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port, NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn10k_eth_txq * +cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn10k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline uint16_t +cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev, + uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + const struct cn10k_eth_txq *txq; + struct rte_mbuf *m = ev->mbuf; + uint16_t ref_cnt = m->refcnt; + uintptr_t lmt_addr; + uint16_t lmt_id; + uintptr_t pa; + + lmt_addr = ws->lmt_base; + ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id); + txq = cn10k_sso_hws_xtract_meta(m, txq_data); + cn10k_nix_tx_skeleton(txq, cmd, flags); + /* Perform header writes before barrier for TSO */ + if (flags & NIX_TX_OFFLOAD_TSO_F) + cn10k_nix_xmit_prepare_tso(m, flags); + + cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags); + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = + cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags); + pa = txq->io_addr | ((segdw - 1) << 4); + } else { + pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4; + } + if (!ev->sched_type) + cnxk_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG); + + roc_lmt_submit_steorl(lmt_id, pa); + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG, + ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 2f071f19ea..a1206dcb61 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -427,6 +427,38 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) #undef R }; + /* Tx modes */ + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue sso_hws_tx_adptr_enq_seg[2][2][2][2][2] = + { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + + const event_tx_adapter_enqueue + sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2] = { +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + [f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name, + NIX_TX_FASTPATH_MODES +#undef T + }; + event_dev->enqueue = cn9k_sso_hws_enq; event_dev->enqueue_burst = cn9k_sso_hws_enq_burst; event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst; @@ -487,6 +519,23 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) } } + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + if (dev->dual_ws) { event_dev->enqueue = cn9k_sso_hws_dual_enq; event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst; @@ -567,8 +616,35 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev) NIX_RX_OFFLOAD_RSS_F)]; } } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] + */ + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } } + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; rte_mb(); } diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index 0f031a5fa3..0ffeeeb93a 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -376,3 +376,63 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], NIX_RX_FASTPATH_MODES #undef R + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base, &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + struct cn9k_sso_hws_dual *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + flags); \ + } \ + \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events) \ + { \ + uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct cn9k_sso_hws_dual *ws = port; \ + \ + RTE_SET_USED(nb_events); \ + return cn9k_sso_hws_event_tx( \ + ws->base[!ws->vws], &ev[0], cmd, \ + (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) & \ + ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F); \ + } + +NIX_TX_FASTPATH_MODES +#undef T diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 1fde652ff8..9ffb8df5b8 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -11,6 +11,7 @@ #include "cn9k_ethdev.h" #include "cn9k_rx.h" +#include "cn9k_tx.h" /* SSO Operations */ @@ -400,4 +401,90 @@ NIX_RX_FASTPATH_MODES NIX_RX_FASTPATH_MODES #undef R +static __rte_always_inline const struct cn9k_eth_txq * +cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) +{ + return (const struct cn9k_eth_txq *) + txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags)); + cn9k_nix_xmit_prepare(m, cmd, flags); +} + +static __rte_always_inline uint16_t +cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, + const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], + const uint32_t flags) +{ + struct rte_mbuf *m = ev->mbuf; + const struct cn9k_eth_txq *txq; + uint16_t ref_cnt = m->refcnt; + + /* Perform header writes before barrier for TSO */ + cn9k_nix_xmit_prepare_tso(m, flags); + /* Lets commit any changes in the packet here in case when + * fast free is set as no further changes will be made to mbuf. + * In case of fast free is not set, both cn9k_nix_prepare_mseg() + * and cn9k_nix_xmit_prepare() has a barrier after refcnt update. + */ + if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)) + rte_io_wmb(); + txq = cn9k_sso_hws_xtract_meta(m, txq_data); + cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags); + if (!ev->sched_type) { + cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, + txq->io_addr, segdw); + } else { + cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, + segdw); + } + } else { + if (!ev->sched_type) { + cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); + cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG); + if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0) + cn9k_nix_xmit_one(cmd, txq->lmt_addr, + txq->io_addr, flags); + } else { + cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, + flags); + } + } + + if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) { + if (ref_cnt > 1) + return 1; + } + + cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + + return 1; +} + +#define T(name, f4, f3, f2, f1, f0, sz, flags) \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); \ + uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name( \ + void *port, struct rte_event ev[], uint16_t nb_events); + +NIX_TX_FASTPATH_MODES +#undef T + #endif -- 2.17.1