> -----Original Message----- > From: Tomasz Dzieciol <t.dziec...@partner.samsung.com> > Sent: Friday, 12 May 2023 17:44 > To: qemu-devel@nongnu.org; akihiko.od...@daynix.com > Cc: Sriram Yagnaraman <sriram.yagnara...@est.tech>; > jasow...@redhat.com; k.kwiec...@samsung.com; > m.socha...@samsung.com > Subject: [PATCH v6 4/7] igb: RX payload guest writting refactoring > > Refactoring is done in preparation for support of multiple advanced > descriptors > RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dziec...@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 ++-- > hw/net/igb_core.c | 216 +++++++++++++++++++++++++-------------- > tests/qtest/libqos/igb.c | 5 + > 3 files changed, 153 insertions(+), 86 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index > b2e54fe802..f9ff31fd70 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore > *core, } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore > *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, > iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > &bastate, > + iov->iov_base + > + iov_ofs, > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ > e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor > */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > + &bastate, > (const char *) &fcs_pad, > e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index > 774b34fc92..0eabe7106e 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo > *r, size_t total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000ERingInfo > +*r) {
Would be nice to have similar names for igb_rxbufsize and igb_get_queue_rx_header_buf_size. If we want to keep igb_rxbufsize due to its similarity with e1000e equivalent, how about igb_rxhdrbufsize() for this new function? > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; } > + > void > igb_start_recv(IGBCore *core) > { > @@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union > e1000_adv_rx_desc *desc, > *buff_addr = le64_to_cpu(desc->read.pkt_addr); } > > +typedef struct IGBPacketRxDMAState { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + uint16_t written; > + hwaddr ba; > +} IGBPacketRxDMAState; > + > static inline void > igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > hwaddr *buff_addr) > @@ -1512,19 +1535,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, > PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) @@ -1550,6 > +1560,93 @@ > igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } > > +static void > +igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t > +*size) { > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr ba, > + uint16_t *written, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) { > + trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > + pci_dma_write(d, ba + *written, data, data_len); > + *written += data_len; > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGBPacketRxDMAState *pdma_st, > + size_t *copy_size) { > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + pdma_st->ba, > + &pdma_st->written, > + > pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + pdma_st->ba, > + &pdma_st->written, > + > pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGBPacketRxDMAState *pdma_st) { > + size_t copy_size; > + > + if (!pdma_st->ba) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); > +} > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, @@ -1559,91 +1656,54 @@ > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000ERingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000ERingInfo *rxi; > + size_t rx_desc_len; > + > + IGBPacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + pdma_st.written = 0; > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, > iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st.ba); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor > */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, > e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / > E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > + rss_info, etqf, ts, pdma_st.written); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index > a603468beb..f40c4ec4cd 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, > + E1000_SRRCTL(0), > + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); > -- > 2.25.1