On 2022-10-13 03:32, Naga Harish K, S V wrote: > > >> -----Original Message----- >> From: Jayatheerthan, Jay <jay.jayatheert...@intel.com> >> Sent: Tuesday, October 11, 2022 12:40 PM >> To: mattias.ronnblom <mattias.ronnb...@ericsson.com>; Carrillo, Erik G >> <erik.g.carri...@intel.com>; Gujjar, Abhinandan S >> <abhinandan.guj...@intel.com>; Jerin Jacob <jer...@marvell.com>; Naga >> Harish K, S V <s.v.naga.haris...@intel.com> >> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haa...@intel.com>; >> hof...@lysator.liu.se; mattias.ronnblom <mattias.ronnb...@ericsson.com> >> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately >> report idle >> >> @Harish, Could you review the patch ? >> >> -Jay >> >>> -----Original Message----- >>> From: Mattias Rönnblom <mattias.ronnb...@ericsson.com> >>> Sent: Monday, October 10, 2022 8:24 PM >>> To: Jayatheerthan, Jay <jay.jayatheert...@intel.com>; Carrillo, Erik G >>> <erik.g.carri...@intel.com>; Gujjar, Abhinandan S >>> <abhinandan.guj...@intel.com>; Jerin Jacob <jer...@marvell.com> >>> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haa...@intel.com>; >>> hof...@lysator.liu.se; mattias.ronnblom >>> <mattias.ronnb...@ericsson.com> >>> Subject: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately >>> report idle >>> >>> Update the Event Ethernet Rx Adapter's service function to report as >>> idle (i.e., return -EAGAIN) in case no Ethernet frames were received >>> from the ethdev and no events were enqueued to the event device. >>> >>> Signed-off-by: Mattias Rönnblom <mattias.ronnb...@ericsson.com> >>> --- >>> lib/eventdev/rte_event_eth_rx_adapter.c | 56 >>> ++++++++++++++++++------- >>> 1 file changed, 41 insertions(+), 15 deletions(-) >>> >>> diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c >>> b/lib/eventdev/rte_event_eth_rx_adapter.c >>> index 5c3021a184..cf7bbd4d69 100644 >>> --- a/lib/eventdev/rte_event_eth_rx_adapter.c >>> +++ b/lib/eventdev/rte_event_eth_rx_adapter.c >>> @@ -1184,7 +1184,7 @@ rxa_intr_thread(void *arg) >>> /* Dequeue <port, q> from interrupt ring and enqueue received >>> * mbufs to eventdev >>> */ >>> -static inline void >>> +static inline bool >>> rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter) { >>> uint32_t n; >>> @@ -1194,20 +1194,27 @@ rxa_intr_ring_dequeue(struct >> event_eth_rx_adapter *rx_adapter) >>> struct rte_event_eth_rx_adapter_stats *stats; >>> rte_spinlock_t *ring_lock; >>> uint8_t max_done = 0; >>> + bool work = false; >>> >>> if (rx_adapter->num_rx_intr == 0) >>> - return; >>> + return work; >>> >>> if (rte_ring_count(rx_adapter->intr_ring) == 0 >>> && !rx_adapter->qd_valid) >>> - return; >>> + return work; >>> >>> buf = &rx_adapter->event_enqueue_buffer; >>> stats = &rx_adapter->stats; >>> ring_lock = &rx_adapter->intr_ring_lock; >>> >>> - if (buf->count >= BATCH_SIZE) >>> - rxa_flush_event_buffer(rx_adapter, buf, stats); >>> + if (buf->count >= BATCH_SIZE) { >>> + uint16_t n; >>> + >>> + n = rxa_flush_event_buffer(rx_adapter, buf, stats); >>> + >>> + if (likely(n > 0)) >>> + work = true; >>> + } >>> >>> while (rxa_pkt_buf_available(buf)) { >>> struct eth_device_info *dev_info; >>> @@ -1289,7 +1296,12 @@ rxa_intr_ring_dequeue(struct >> event_eth_rx_adapter *rx_adapter) >>> } >>> >>> done: >>> - rx_adapter->stats.rx_intr_packets += nb_rx; >>> + if (nb_rx > 0) { > > How are the performance numbers before and after this patch? > Trying to understand the performance impact, as new condition is added to the > service function Datapath. > I haven't tested the RX and TX adapters separately, but if you run them on the same core, I get the following result:
Without patches, with stats disabled: 16,0 Mpps Without patches, with stats enabled: 16,1 Mpps With patches, with stats disabled: 16,1 Mpps With patches, with stats enabled: 16,2 Mpps So these patches, with this particular hardware, compiler, and test application, adding a tiny bit of additional logic actually make the RX+TX adapter perform better. This is contrary to what you might expect, and I'm sure YMMV. Enabling service core statistics (which boils down to a 2x rdtsc and some cheap arithmetic in rte_service.c) actually make the RX+TX adapter core perform better, both before and after this patchset. Also contrary to what you might expect. The results are consistent across multiple runs. GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the ring Ethdev. >>> + rx_adapter->stats.rx_intr_packets += nb_rx; >>> + work = true; >>> + } >>> + >>> + return work; >>> } >>> >>> /* >>> @@ -1305,7 +1317,7 @@ rxa_intr_ring_dequeue(struct >> event_eth_rx_adapter *rx_adapter) >>> * the hypervisor's switching layer where adjustments can be made to deal >> with >>> * it. >>> */ >>> -static inline void >>> +static inline bool >>> rxa_poll(struct event_eth_rx_adapter *rx_adapter) { >>> uint32_t num_queue; >>> @@ -1314,6 +1326,7 @@ rxa_poll(struct event_eth_rx_adapter >> *rx_adapter) >>> struct rte_event_eth_rx_adapter_stats *stats = NULL; >>> uint32_t wrr_pos; >>> uint32_t max_nb_rx; >>> + bool work = false; >>> >>> wrr_pos = rx_adapter->wrr_pos; >>> max_nb_rx = rx_adapter->max_nb_rx; >>> @@ -1329,14 +1342,20 @@ rxa_poll(struct event_eth_rx_adapter >> *rx_adapter) >>> /* Don't do a batch dequeue from the rx queue if there isn't >>> * enough space in the enqueue buffer. >>> */ >>> - if (buf->count >= BATCH_SIZE) >>> - rxa_flush_event_buffer(rx_adapter, buf, stats); >>> + if (buf->count >= BATCH_SIZE) { >>> + uint16_t n; >>> + >>> + n = rxa_flush_event_buffer(rx_adapter, buf, stats); >>> + >>> + if (likely(n > 0)) >>> + work = true; > > Same as above > >>> + } >>> if (!rxa_pkt_buf_available(buf)) { >>> if (rx_adapter->use_queue_event_buf) >>> goto poll_next_entry; >>> else { >>> rx_adapter->wrr_pos = wrr_pos; >>> - return; >>> + break; >>> } >>> } >>> >>> @@ -1352,6 +1371,11 @@ rxa_poll(struct event_eth_rx_adapter >> *rx_adapter) >>> if (++wrr_pos == rx_adapter->wrr_len) >>> wrr_pos = 0; >>> } >>> + >>> + if (nb_rx > 0) >>> + work = true; >>> + >>> + return work; > > Same as above > >>> } >>> >>> static void >>> @@ -1384,12 +1408,14 @@ static int >>> rxa_service_func(void *args) >>> { >>> struct event_eth_rx_adapter *rx_adapter = args; >>> + bool intr_work; >>> + bool poll_work; >>> >>> if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0) >>> - return 0; >>> + return -EAGAIN; >>> if (!rx_adapter->rxa_started) { >>> rte_spinlock_unlock(&rx_adapter->rx_lock); >>> - return 0; >>> + return -EAGAIN; >>> } >>> >>> if (rx_adapter->ena_vector) { >>> @@ -1410,12 +1436,12 @@ rxa_service_func(void *args) >>> } >>> } >>> >>> - rxa_intr_ring_dequeue(rx_adapter); >>> - rxa_poll(rx_adapter); >>> + intr_work = rxa_intr_ring_dequeue(rx_adapter); >>> + poll_work = rxa_poll(rx_adapter); >>> >>> rte_spinlock_unlock(&rx_adapter->rx_lock); >>> >>> - return 0; >>> + return intr_work || poll_work ? 0 : -EAGAIN; >>> } >>> >>> static void * >>> -- >>> 2.34.1 >