On Wed, Sep 16, 2020 at 12:27 AM Harman Kalra <hka...@marvell.com> wrote: > > From: Pavan Nikhilesh <pbhagavat...@marvell.com> > > Improve single flow performance by moving the point of coherence > to the end of transmit sequence. > > Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com> > --- > drivers/event/octeontx2/otx2_worker.h | 35 +++++++++++++++++---------- > drivers/net/octeontx2/otx2_tx.h | 18 ++++++++++++++ > 2 files changed, 40 insertions(+), 13 deletions(-)
Failed[1] to apply this patch on dpdk-next-eventdev Could you rebase this patch to dpdk-next-eventdev tree and send an update version? [1] [for-main][dpdk-next-eventdev] $ git am -3 /tmp/r/2-4-event-octeontx2-improve-single-flow-performance Applying: event/octeontx2: improve single flow performance error: sha1 information is lacking or useless (drivers/event/octeontx2/otx2_worker.h). error: could not build fake ancestor Patch failed at 0001 event/octeontx2: improve single flow performance hint: Use 'git am --show-current-patch=diff' to see the failed patch When you have resolved this problem, run "git am --continue". If you prefer to skip this patch, run "git am --skip" instead. To restore the original branch and stop patching, run "git am --abort" > > diff --git a/drivers/event/octeontx2/otx2_worker.h > b/drivers/event/octeontx2/otx2_worker.h > index 1bf8afedf..32d611458 100644 > --- a/drivers/event/octeontx2/otx2_worker.h > +++ b/drivers/event/octeontx2/otx2_worker.h > @@ -247,15 +247,6 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) > #endif > } > > -static __rte_always_inline void > -otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) > -{ > - if (wait_flag) > - otx2_ssogws_head_wait(ws); > - > - rte_cio_wmb(); > -} > - > static __rte_always_inline const struct otx2_eth_txq * > otx2_ssogws_xtract_meta(struct rte_mbuf *m, > const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT]) > @@ -287,10 +278,9 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct > rte_event ev[], > return otx2_sec_event_tx(ws, ev, m, txq, flags); > } > > - rte_prefetch_non_temporal(&txq_data[m->port][0]); > /* Perform header writes before barrier for TSO */ > otx2_nix_xmit_prepare_tso(m, flags); > - otx2_ssogws_order(ws, !ev->sched_type); > + rte_cio_wmb(); > txq = otx2_ssogws_xtract_meta(m, txq_data); > otx2_ssogws_prepare_pkt(txq, m, cmd, flags); > > @@ -298,12 +288,31 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct > rte_event ev[], > const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); > otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], > m->ol_flags, segdw, flags); > - otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, > segdw); > + if (!ev->sched_type) { > + otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, > segdw); > + otx2_ssogws_head_wait(ws); > + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, > + txq->io_addr, segdw); > + } else { > + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, > txq->io_addr, > + segdw); > + } > } else { > /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ > otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], > m->ol_flags, 4, flags); > - otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); > + > + if (!ev->sched_type) { > + otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); > + otx2_ssogws_head_wait(ws); > + if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) > + otx2_nix_xmit_one(cmd, txq->lmt_addr, > + txq->io_addr, flags); > + } else { > + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, > + flags); > + } > } > > otx2_write64(0, ws->swtag_flush_op); > diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h > index 3c4317092..caf170fd1 100644 > --- a/drivers/net/octeontx2/otx2_tx.h > +++ b/drivers/net/octeontx2/otx2_tx.h > @@ -383,6 +383,18 @@ otx2_nix_xmit_one(uint64_t *cmd, void *lmt_addr, > } while (lmt_status == 0); > } > > +static __rte_always_inline void > +otx2_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags) > +{ > + otx2_lmt_mov(lmt_addr, cmd, otx2_nix_tx_ext_subs(flags)); > +} > + > +static __rte_always_inline uint64_t > +otx2_nix_xmit_submit_lmt(const rte_iova_t io_addr) > +{ > + return otx2_lmt_submit(io_addr); > +} > + > static __rte_always_inline uint16_t > otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t > flags) > { > @@ -453,6 +465,12 @@ otx2_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, > const uint16_t flags) > return segdw; > } > > +static __rte_always_inline void > +otx2_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw) > +{ > + otx2_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw); > +} > + > static __rte_always_inline void > otx2_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, > rte_iova_t io_addr, uint16_t segdw) > -- > 2.18.0 >