From: Mateusz Polchlopek <mateusz.polchlo...@intel.com>
Date: Tue,  4 Jun 2024 09:13:57 -0400

> From: Jacob Keller <jacob.e.kel...@intel.com>
> 
> Using VIRTCHNL_VF_OFFLOAD_FLEX_DESC, the iAVF driver is capable of
> negotiating to enable the advanced flexible descriptor layout. Add the
> flexible NIC layout (RXDID=2) as a member of the Rx descriptor union.
> 
> Also add bit position definitions for the status and error indications
> that are needed.
> 
> The iavf_clean_rx_irq function needs to extract a few fields from the Rx
> descriptor, including the size, rx_ptype, and vlan_tag.
> Move the extraction to a separate function that decodes the fields into
> a structure. This will reduce the burden for handling multiple
> descriptor types by keeping the relevant extraction logic in one place.
> 
> To support handling an additional descriptor format with minimal code
> duplication, refactor Rx checksum handling so that the general logic
> is separated from the bit calculations. Introduce an iavf_rx_desc_decoded
> structure which holds the relevant bits decoded from the Rx descriptor.
> This will enable implementing flexible descriptor handling without
> duplicating the general logic twice.
> 
> Introduce an iavf_extract_flex_rx_fields, iavf_flex_rx_hash, and
> iavf_flex_rx_csum functions which operate on the flexible NIC descriptor
> format instead of the legacy 32 byte format. Based on the negotiated
> RXDID, select the correct function for processing the Rx descriptors.
> 
> With this change, the Rx hot path should be functional when using either
> the default legacy 32byte format or when we switch to the flexible NIC
> layout.
> 
> Modify the Rx hot path to add support for the flexible descriptor
> format and add request enabling Rx timestamps for all queues.
> 
> As in ice, make sure we bump the checksum level if the hardware detected
> a packet type which could have an outer checksum. This is important
> because hardware only verifies the inner checksum.
> 
> Signed-off-by: Jacob Keller <jacob.e.kel...@intel.com>
> Reviewed-by: Wojciech Drewek <wojciech.dre...@intel.com>
> Co-developed-by: Mateusz Polchlopek <mateusz.polchlo...@intel.com>
> Signed-off-by: Mateusz Polchlopek <mateusz.polchlo...@intel.com>
> ---
>  drivers/net/ethernet/intel/iavf/iavf_txrx.c   | 354 +++++++++++++-----
>  drivers/net/ethernet/intel/iavf/iavf_txrx.h   |   8 +
>  drivers/net/ethernet/intel/iavf/iavf_type.h   | 147 ++++++--
>  .../net/ethernet/intel/iavf/iavf_virtchnl.c   |   5 +
>  4 files changed, 391 insertions(+), 123 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c 
> b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> index 26b424fd6718..97da5af52ad7 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> @@ -895,63 +895,68 @@ bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, 
> u16 cleaned_count)
>       return true;
>  }
>  
> +/* iavf_rx_csum_decoded
> + *
> + * Checksum offload bits decoded from the receive descriptor.
> + */
> +struct iavf_rx_csum_decoded {
> +     u8 l3l4p : 1;
> +     u8 ipe : 1;
> +     u8 eipe : 1;
> +     u8 eudpe : 1;
> +     u8 ipv6exadd : 1;
> +     u8 l4e : 1;
> +     u8 pprs : 1;
> +     u8 nat : 1;
> +};

I see the same struct in idpf, probably a candidate for libeth.

> +
>  /**
> - * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
> + * iavf_rx_csum - Indicate in skb if hw indicated a good checksum
>   * @vsi: the VSI we care about
>   * @skb: skb currently being received and modified
> - * @rx_desc: the receive descriptor
> + * @ptype: decoded ptype information
> + * @csum_bits: decoded Rx descriptor information
>   **/
> -static void iavf_rx_checksum(struct iavf_vsi *vsi,
> -                          struct sk_buff *skb,
> -                          union iavf_rx_desc *rx_desc)
> +static void iavf_rx_csum(struct iavf_vsi *vsi, struct sk_buff *skb,

Can't @vsi be const?

> +                      struct libeth_rx_pt *ptype,

struct libeth_rx_pt is smaller than a pointer to it. Pass it directly

> +                      struct iavf_rx_csum_decoded *csum_bits)

Same for this struct.

>  {
> -     struct libeth_rx_pt decoded;
> -     u32 rx_error, rx_status;
>       bool ipv4, ipv6;
> -     u8 ptype;
> -     u64 qword;
>  
>       skb->ip_summed = CHECKSUM_NONE;
>  
> -     qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> -     ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> -
> -     decoded = libie_rx_pt_parse(ptype);
> -     if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> -             return;
> -
> -     rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword);
> -     rx_status = FIELD_GET(IAVF_RXD_QW1_STATUS_MASK, qword);
> -
>       /* did the hardware decode the packet and checksum? */
> -     if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
> +     if (!csum_bits->l3l4p)
>               return;
>  
> -     ipv4 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV4;
> -     ipv6 = libeth_rx_pt_get_ip_ver(decoded) == LIBETH_RX_PT_OUTER_IPV6;
> +     ipv4 = libeth_rx_pt_get_ip_ver(*ptype) == LIBETH_RX_PT_OUTER_IPV4;
> +     ipv6 = libeth_rx_pt_get_ip_ver(*ptype) == LIBETH_RX_PT_OUTER_IPV6;
>  
> -     if (ipv4 &&
> -         (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
> -                      BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
> +     if (ipv4 && (csum_bits->ipe || csum_bits->eipe))
>               goto checksum_fail;
>  
>       /* likely incorrect csum if alternate IP extension headers found */
> -     if (ipv6 &&
> -         rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
> -             /* don't increment checksum err here, non-fatal err */
> +     if (ipv6 && csum_bits->ipv6exadd)
>               return;
>  
>       /* there was some L4 error, count error and punt packet to the stack */
> -     if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT))
> +     if (csum_bits->l4e)
>               goto checksum_fail;
>  
>       /* handle packets that were not able to be checksummed due
>        * to arrival speed, in this case the stack can compute
>        * the csum.
>        */
> -     if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
> +     if (csum_bits->pprs)
>               return;
>  
> +     /* If there is an outer header present that might contain a checksum
> +      * we need to bump the checksum level by 1 to reflect the fact that
> +      * we are indicating we validated the inner checksum.
> +      */
> +     if (ptype->tunnel_type >= LIBETH_RX_PT_TUNNEL_IP_GRENAT)
> +             skb->csum_level = 1;
> +
>       skb->ip_summed = CHECKSUM_UNNECESSARY;
>       return;

For the whole function: you need to use unlikely() for checksum errors
to not slow down regular frames.
Also, I would even unlikely() packets with not verified checksum as it's
really rare case.
Optimize hotpath for most common workloads.

>  
> @@ -960,22 +965,105 @@ static void iavf_rx_checksum(struct iavf_vsi *vsi,
>  }
>  
>  /**
> - * iavf_rx_hash - set the hash value in the skb
> + * iavf_legacy_rx_csum - Indicate in skb if hw indicated a good cksum
> + * @vsi: the VSI we care about
> + * @skb: skb currently being received and modified
> + * @rx_desc: the receive descriptor
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
> + **/
> +static void iavf_legacy_rx_csum(struct iavf_vsi *vsi,
> +                             struct sk_buff *skb,
> +                             union iavf_rx_desc *rx_desc)

@vsi and @rx_desc can be const.

> +{
> +     struct iavf_rx_csum_decoded csum_bits;
> +     struct libeth_rx_pt decoded;
> +
> +     u32 rx_error;
> +     u64 qword;
> +     u16 ptype;
> +
> +     qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> +     ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> +     rx_error = FIELD_GET(IAVF_RXD_QW1_ERROR_MASK, qword);

You don't need @rx_error before libeth_rx_pt_has_checksum().

> +     decoded = libie_rx_pt_parse(ptype);
> +
> +     if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> +             return;
> +
> +     csum_bits.ipe = FIELD_GET(IAVF_RX_DESC_ERROR_IPE_MASK, rx_error);

So, @rx_error is a field of @qword and then there are more subfields?
Why not extract those fields directly from @qword?

> +     csum_bits.eipe = FIELD_GET(IAVF_RX_DESC_ERROR_EIPE_MASK, rx_error);
> +     csum_bits.l4e = FIELD_GET(IAVF_RX_DESC_ERROR_L4E_MASK, rx_error);
> +     csum_bits.pprs = FIELD_GET(IAVF_RX_DESC_ERROR_PPRS_MASK, rx_error);
> +     csum_bits.l3l4p = FIELD_GET(IAVF_RX_DESC_STATUS_L3L4P_MASK, rx_error);
> +     csum_bits.ipv6exadd = FIELD_GET(IAVF_RX_DESC_STATUS_IPV6EXADD_MASK,
> +                                     rx_error);
> +     csum_bits.nat = 0;
> +     csum_bits.eudpe = 0;

Initialize the whole struct with = { } at the declaration site and
remove this.

> +
> +     iavf_rx_csum(vsi, skb, &decoded, &csum_bits);

In order to avoid having 2 call sites for this, make
iavf_{flex,legacy}_rx_csum() return @csum_bits and call iavf_rx_csum()
outside of them once.

> +}
> +
> +/**
> + * iavf_flex_rx_csum - Indicate in skb if hw indicated a good cksum
> + * @vsi: the VSI we care about
> + * @skb: skb currently being received and modified
> + * @rx_desc: the receive descriptor
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + **/
> +static void iavf_flex_rx_csum(struct iavf_vsi *vsi, struct sk_buff *skb,
> +                           union iavf_rx_desc *rx_desc)

Same for const.

> +{
> +     struct iavf_rx_csum_decoded csum_bits;
> +     struct libeth_rx_pt decoded;
> +     u16 rx_status0, ptype;
> +
> +     rx_status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);

This is not needed before libeth_rx_pt_has_checksum().

> +     ptype = FIELD_GET(IAVF_RX_FLEX_DESC_PTYPE_M,
> +                       le16_to_cpu(rx_desc->flex_wb.ptype_flexi_flags0));

You also access this field later when extracting base fields. Shouldn't
this be combined somehow?

> +     decoded = libie_rx_pt_parse(ptype);
> +
> +     if (!libeth_rx_pt_has_checksum(vsi->netdev, decoded))
> +             return;
> +
> +     csum_bits.ipe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_M,
> +                               rx_status0);
> +     csum_bits.eipe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M,
> +                                rx_status0);
> +     csum_bits.l4e = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_M,
> +                               rx_status0);
> +     csum_bits.eudpe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M,
> +                                 rx_status0);
> +     csum_bits.l3l4p = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_L3L4P_M,
> +                                 rx_status0);
> +     csum_bits.ipv6exadd = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_M,
> +                                     rx_status0);
> +     csum_bits.nat = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS1_NAT_M, rx_status0);
> +     csum_bits.pprs = 0;

See above for struct initialization.

> +
> +     iavf_rx_csum(vsi, skb, &decoded, &csum_bits);

See above.

> +}
> +
> +/**
> + * iavf_legacy_rx_hash - set the hash value in the skb
>   * @ring: descriptor ring
>   * @rx_desc: specific descriptor
>   * @skb: skb currently being received and modified
>   * @rx_ptype: Rx packet type
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
>   **/
> -static void iavf_rx_hash(struct iavf_ring *ring,
> -                      union iavf_rx_desc *rx_desc,
> -                      struct sk_buff *skb,
> -                      u8 rx_ptype)
> +static void iavf_legacy_rx_hash(struct iavf_ring *ring,
> +                             union iavf_rx_desc *rx_desc,

Const for both.

> +                             struct sk_buff *skb, u8 rx_ptype)
>  {
> +     const __le64 rss_mask = cpu_to_le64(IAVF_RX_DESC_STATUS_FLTSTAT_MASK);
>       struct libeth_rx_pt decoded;
>       u32 hash;
> -     const __le64 rss_mask =
> -             cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
> -                         IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);

Looks like unrelated, but nice change anyway.

>  
>       decoded = libie_rx_pt_parse(rx_ptype);
>       if (!libeth_rx_pt_has_hash(ring->netdev, decoded))
> @@ -987,6 +1075,38 @@ static void iavf_rx_hash(struct iavf_ring *ring,
>       }
>  }
>  
> +/**
> + * iavf_flex_rx_hash - set the hash value in the skb
> + * @ring: descriptor ring
> + * @rx_desc: specific descriptor
> + * @skb: skb currently being received and modified
> + * @rx_ptype: Rx packet type
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + **/
> +static void iavf_flex_rx_hash(struct iavf_ring *ring,
> +                           union iavf_rx_desc *rx_desc,

Const.

> +                           struct sk_buff *skb, u16 rx_ptype)

Why is @rx_ptype u16 here, but u8 above? Just use u32 for both.

> +{
> +     struct libeth_rx_pt decoded;
> +     u16 status0;
> +     u32 hash;
> +
> +     if (!(ring->netdev->features & NETIF_F_RXHASH))

This is checked in libeth_rx_pt_has_hash(), why check 2 times?

> +             return;
> +
> +     decoded = libie_rx_pt_parse(rx_ptype);
> +     if (!libeth_rx_pt_has_hash(ring->netdev, decoded))
> +             return;
> +
> +     status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);
> +     if (status0 & IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_M) {
> +             hash = le32_to_cpu(rx_desc->flex_wb.rss_hash);
> +             libeth_rx_pt_set_hash(skb, hash, decoded);
> +     }
> +}

Also, just parse rx_ptype once in process_skb_fields(), you don't need
to do that in each function.

> +
>  /**
>   * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
>   * @rx_ring: rx descriptor ring packet is being transacted on
> @@ -998,14 +1118,17 @@ static void iavf_rx_hash(struct iavf_ring *ring,
>   * order to populate the hash, checksum, VLAN, protocol, and
>   * other fields within the skb.
>   **/
> -static void
> -iavf_process_skb_fields(struct iavf_ring *rx_ring,
> -                     union iavf_rx_desc *rx_desc, struct sk_buff *skb,
> -                     u8 rx_ptype)
> +static void iavf_process_skb_fields(struct iavf_ring *rx_ring,
> +                                 union iavf_rx_desc *rx_desc,

Const.

> +                                 struct sk_buff *skb, u16 rx_ptype)
>  {
> -     iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> -
> -     iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
> +     if (rx_ring->rxdid == VIRTCHNL_RXDID_1_32B_BASE) {

Any likely/unlikely() here? Or it's 50:50?

> +             iavf_legacy_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> +             iavf_legacy_rx_csum(rx_ring->vsi, skb, rx_desc);
> +     } else {
> +             iavf_flex_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
> +             iavf_flex_rx_csum(rx_ring->vsi, skb, rx_desc);
> +     }
>  
>       skb_record_rx_queue(skb, rx_ring->queue_index);
>  
> @@ -1092,7 +1215,7 @@ static struct sk_buff *iavf_build_skb(const struct 
> libeth_fqe *rx_buffer,
>  /**
>   * iavf_is_non_eop - process handling of non-EOP buffers
>   * @rx_ring: Rx ring being processed
> - * @rx_desc: Rx descriptor for current buffer
> + * @fields: Rx descriptor extracted fields
>   * @skb: Current socket buffer containing buffer in progress
>   *
>   * This function updates next to clean.  If the buffer is an EOP buffer
> @@ -1101,7 +1224,7 @@ static struct sk_buff *iavf_build_skb(const struct 
> libeth_fqe *rx_buffer,
>   * that this is in fact a non-EOP buffer.
>   **/
>  static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
> -                         union iavf_rx_desc *rx_desc,
> +                         struct iavf_rx_extracted *fields,

Pass value instead of pointer.

>                           struct sk_buff *skb)

Is it still needed?

>  {
>       u32 ntc = rx_ring->next_to_clean + 1;
> @@ -1113,8 +1236,7 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
>       prefetch(IAVF_RX_DESC(rx_ring, ntc));
>  
>       /* if we are the last buffer then there is nothing else to do */
> -#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
> -     if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
> +     if (likely(fields->end_of_packet))
>               return false;
>  
>       rx_ring->rx_stats.non_eop_descs++;
> @@ -1122,6 +1244,91 @@ static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
>       return true;
>  }
>  
> +/**
> + * iavf_extract_legacy_rx_fields - Extract fields from the Rx descriptor
> + * @rx_ring: rx descriptor ring
> + * @rx_desc: the descriptor to process
> + * @fields: storage for extracted values
> + *
> + * Decode the Rx descriptor and extract relevant information including the
> + * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
> + *
> + * This function only operates on the VIRTCHNL_RXDID_1_32B_BASE legacy 32byte
> + * descriptor writeback format.
> + */
> +static void iavf_extract_legacy_rx_fields(struct iavf_ring *rx_ring,
> +                                       union iavf_rx_desc *rx_desc,

Consts.

> +                                       struct iavf_rx_extracted *fields)

Return a struct &iavf_rx_extracted instead of passing a pointer to it.

> +{
> +     u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> +
> +     fields->size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword);
> +     fields->rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> +
> +     if (qword & IAVF_RX_DESC_STATUS_L2TAG1P_MASK &&
> +         rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
> +             fields->vlan_tag = 
> le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
> +
> +     if (rx_desc->wb.qword2.ext_status &
> +         cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&

Bitops must have own pairs of braces.

> +         rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> +             fields->vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
> +
> +     fields->end_of_packet = FIELD_GET(IAVF_RX_DESC_STATUS_EOF_MASK, qword);
> +     fields->rxe = FIELD_GET(BIT(IAVF_RXD_QW1_ERROR_SHIFT), qword);
> +}
> +
> +/**
> + * iavf_extract_flex_rx_fields - Extract fields from the Rx descriptor
> + * @rx_ring: rx descriptor ring
> + * @rx_desc: the descriptor to process
> + * @fields: storage for extracted values
> + *
> + * Decode the Rx descriptor and extract relevant information including the
> + * size, VLAN tag, Rx packet type, end of packet field and RXE field value.
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + */
> +static void iavf_extract_flex_rx_fields(struct iavf_ring *rx_ring,
> +                                     union iavf_rx_desc *rx_desc,
> +                                     struct iavf_rx_extracted *fields)

Same for everything.

> +{
> +     u16 status0, status1, flexi_flags0;
> +
> +     fields->size = FIELD_GET(IAVF_RX_FLEX_DESC_PKT_LEN_M,
> +                              le16_to_cpu(rx_desc->flex_wb.pkt_len));

le16_get_bits().

> +
> +     flexi_flags0 = le16_to_cpu(rx_desc->flex_wb.ptype_flexi_flags0);
> +
> +     fields->rx_ptype = FIELD_GET(IAVF_RX_FLEX_DESC_PTYPE_M, flexi_flags0);

le16_get_bits() instead of these two?

> +
> +     status0 = le16_to_cpu(rx_desc->flex_wb.status_error0);
> +     if (status0 & IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_M &&
> +         rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)

Braces for bitops.

> +             fields->vlan_tag = le16_to_cpu(rx_desc->flex_wb.l2tag1);
> +
> +     status1 = le16_to_cpu(rx_desc->flex_wb.status_error1);
> +     if (status1 & IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_M &&
> +         rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> +             fields->vlan_tag = le16_to_cpu(rx_desc->flex_wb.l2tag2_2nd);

(same)

> +
> +     fields->end_of_packet = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS_ERR0_EOP_BIT,
> +                                       status0);
> +     fields->rxe = FIELD_GET(IAVF_RX_FLEX_DESC_STATUS_ERR0_RXE_BIT,
> +                             status0);
> +}
> +
> +static void iavf_extract_rx_fields(struct iavf_ring *rx_ring,
> +                                union iavf_rx_desc *rx_desc,
> +                                struct iavf_rx_extracted *fields)

Consts + return struct @fields directly.

> +{
> +     if (rx_ring->rxdid == VIRTCHNL_RXDID_1_32B_BASE)

You check this several times, this could be combined and optimized.

> +             iavf_extract_legacy_rx_fields(rx_ring, rx_desc, fields);
> +     else
> +             iavf_extract_flex_rx_fields(rx_ring, rx_desc, fields);
> +}
> +
>  /**
>   * iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
>   * @rx_ring: rx descriptor ring to transact packets on
> @@ -1142,12 +1349,9 @@ static int iavf_clean_rx_irq(struct iavf_ring 
> *rx_ring, int budget)
>       bool failure = false;
>  
>       while (likely(total_rx_packets < (unsigned int)budget)) {
> +             struct iavf_rx_extracted fields = {};
>               struct libeth_fqe *rx_buffer;
>               union iavf_rx_desc *rx_desc;
> -             unsigned int size;
> -             u16 vlan_tag = 0;
> -             u8 rx_ptype;
> -             u64 qword;
>  
>               /* return some buffers to hardware, one at a time is too slow */
>               if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
> @@ -1158,35 +1362,27 @@ static int iavf_clean_rx_irq(struct iavf_ring 
> *rx_ring, int budget)
>  
>               rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
>  
> -             /* status_error_len will always be zero for unused descriptors
> -              * because it's cleared in cleanup, and overlaps with hdr_addr
> -              * which is always zero because packet split isn't used, if the
> -              * hardware wrote DD then the length will be non-zero
> -              */
> -             qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> -
>               /* This memory barrier is needed to keep us from reading
>                * any other fields out of the rx_desc until we have
>                * verified the descriptor has been written back.
>                */
>               dma_rmb();
> -#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
> -             if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
> +             if (!iavf_test_staterr(rx_desc, IAVF_RX_DESC_STATUS_DD_MASK))
>                       break;
>  
> -             size = FIELD_GET(IAVF_RXD_QW1_LENGTH_PBUF_MASK, qword);
> +             iavf_extract_rx_fields(rx_ring, rx_desc, &fields);
>  
>               iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
>  
>               rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean];
> -             if (!libeth_rx_sync_for_cpu(rx_buffer, size))
> +             if (!libeth_rx_sync_for_cpu(rx_buffer, fields.size))
>                       goto skip_data;
>  
>               /* retrieve a buffer from the ring */
>               if (skb)
> -                     iavf_add_rx_frag(skb, rx_buffer, size);
> +                     iavf_add_rx_frag(skb, rx_buffer, fields.size);
>               else
> -                     skb = iavf_build_skb(rx_buffer, size);
> +                     skb = iavf_build_skb(rx_buffer, fields.size);
>  
>               /* exit if we failed to retrieve a buffer */
>               if (!skb) {
> @@ -1197,15 +1393,14 @@ static int iavf_clean_rx_irq(struct iavf_ring 
> *rx_ring, int budget)
>  skip_data:
>               cleaned_count++;
>  
> -             if (iavf_is_non_eop(rx_ring, rx_desc, skb) || unlikely(!skb))
> +             if (iavf_is_non_eop(rx_ring, &fields, skb) || unlikely(!skb))
>                       continue;
>  
> -             /* ERR_MASK will only have valid bits if EOP set, and
> -              * what we are doing here is actually checking
> -              * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
> -              * the error field
> +             /* RXE field in descriptor is an indication of the MAC errors
> +              * (like CRC, alignment, oversize etc). If it is set then iavf
> +              * should finish.
>                */
> -             if (unlikely(iavf_test_staterr(rx_desc, 
> BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
> +             if (unlikely(fields.rxe)) {
>                       dev_kfree_skb_any(skb);
>                       skb = NULL;
>                       continue;
> @@ -1219,22 +1414,11 @@ static int iavf_clean_rx_irq(struct iavf_ring 
> *rx_ring, int budget)
>               /* probably a little skewed due to removing CRC */
>               total_rx_bytes += skb->len;
>  
> -             qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
> -             rx_ptype = FIELD_GET(IAVF_RXD_QW1_PTYPE_MASK, qword);
> -
>               /* populate checksum, VLAN, and protocol */
> -             iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
> -
> -             if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
> -                 rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
> -                     vlan_tag = 
> le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
> -             if (rx_desc->wb.qword2.ext_status &
> -                 cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
> -                 rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
> -                     vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);

BTW I'm wondering whether filling the whole @fields can be less
optimized than accesssing descriptor fields one by one like it was here
before.
I mean, in some cases you won't need all the fields from the extracted
struct, but they will be read and initialized anyway.

> +             iavf_process_skb_fields(rx_ring, rx_desc, skb, fields.rx_ptype);
>  
>               iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
> -             iavf_receive_skb(rx_ring, skb, vlan_tag);
> +             iavf_receive_skb(rx_ring, skb, fields.vlan_tag);
>               skb = NULL;
>  
>               /* update budget accounting */
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h 
> b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> index 17309d8625ac..3661cd57a068 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
> @@ -99,6 +99,14 @@ static inline bool iavf_test_staterr(union iavf_rx_desc 
> *rx_desc,
>                 cpu_to_le64(stat_err_bits));
>  }
>  
> +struct iavf_rx_extracted {
> +     unsigned int size;
> +     u16 vlan_tag;
> +     u16 rx_ptype;
> +     u8 end_of_packet:1;
> +     u8 rxe:1;
> +};

Also something libethish, but not sure for this one.

> +
>  /* How many Rx Buffers do we bundle into one write to the hardware ? */
>  #define IAVF_RX_INCREMENT(r, i) \
>       do {                                    \
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h 
> b/drivers/net/ethernet/intel/iavf/iavf_type.h
> index f6b09e57abce..82c16a720807 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_type.h
> +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
> @@ -206,6 +206,45 @@ union iavf_16byte_rx_desc {
>       } wb;  /* writeback */
>  };
>  
> +/* Rx Flex Descriptor NIC Profile
> + * RxDID Profile ID 2
> + * Flex-field 0: RSS hash lower 16-bits
> + * Flex-field 1: RSS hash upper 16-bits
> + * Flex-field 2: Flow ID lower 16-bits
> + * Flex-field 3: Flow ID higher 16-bits
> + * Flex-field 4: reserved, VLAN ID taken from L2Tag
> + */
> +struct iavf_32byte_rx_flex_wb {
> +     /* Qword 0 */
> +     u8 rxdid;
> +     u8 mir_id_umb_cast;
> +     __le16 ptype_flexi_flags0;
> +     __le16 pkt_len;
> +     __le16 hdr_len_sph_flex_flags1;
> +
> +     /* Qword 1 */
> +     __le16 status_error0;
> +     __le16 l2tag1;
> +     __le32 rss_hash;
> +
> +     /* Qword 2 */
> +     __le16 status_error1;
> +     u8 flexi_flags2;
> +     u8 ts_low;
> +     __le16 l2tag2_1st;
> +     __le16 l2tag2_2nd;
> +
> +     /* Qword 3 */
> +     __le32 flow_id;
> +     union {
> +             struct {
> +                     __le16 rsvd;
> +                     __le16 flow_id_ipv6;
> +             } flex;
> +             __le32 ts_high;
> +     } flex_ts;
> +};

I'm wondering whether HW descriptors can be defined as just a bunch of
u64 qwords instead of all those u8/__le16 etc. fields. That would be faster.
In case this would work differently on BE and LE, #ifdefs.

> +
>  union iavf_32byte_rx_desc {
>       struct {
>               __le64  pkt_addr; /* Packet buffer address */
> @@ -253,35 +292,34 @@ union iavf_32byte_rx_desc {
>                       } hi_dword;
>               } qword3;
>       } wb;  /* writeback */
> +     struct iavf_32byte_rx_flex_wb flex_wb;

So, already existing formats were described here directly, but flex is
declared as a field? Can this be more consistent?

>  };
>  
> -enum iavf_rx_desc_status_bits {
> -     /* Note: These are predefined bit offsets */
> -     IAVF_RX_DESC_STATUS_DD_SHIFT            = 0,
> -     IAVF_RX_DESC_STATUS_EOF_SHIFT           = 1,
> -     IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT       = 2,
> -     IAVF_RX_DESC_STATUS_L3L4P_SHIFT         = 3,
> -     IAVF_RX_DESC_STATUS_CRCP_SHIFT          = 4,
> -     IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT      = 5, /* 2 BITS */
> -     IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT     = 7,
> -     /* Note: Bit 8 is reserved in X710 and XL710 */
> -     IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT     = 8,
> -     IAVF_RX_DESC_STATUS_UMBCAST_SHIFT       = 9, /* 2 BITS */
> -     IAVF_RX_DESC_STATUS_FLM_SHIFT           = 11,
> -     IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT       = 12, /* 2 BITS */
> -     IAVF_RX_DESC_STATUS_LPBK_SHIFT          = 14,
> -     IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT     = 15,
> -     IAVF_RX_DESC_STATUS_RESERVED_SHIFT      = 16, /* 2 BITS */
> -     /* Note: For non-tunnel packets INT_UDP_0 is the right status for
> -      * UDP header
> -      */
> -     IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT     = 18,
> -     IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */
> -};
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_DESC_STATUS_DD_MASK          BIT(0)
> +#define IAVF_RX_DESC_STATUS_EOF_MASK         BIT(1)
> +#define IAVF_RX_DESC_STATUS_L2TAG1P_MASK     BIT(2)
> +#define IAVF_RX_DESC_STATUS_L3L4P_MASK               BIT(3)
> +#define IAVF_RX_DESC_STATUS_CRCP_MASK                BIT(4)
> +#define IAVF_RX_DESC_STATUS_TSYNINDX_MASK    GENMASK_ULL(6, 5)
> +#define IAVF_RX_DESC_STATUS_TSYNVALID_MASK   BIT(7)
> +/* Note: Bit 8 is reserved in X710 and XL710 */
> +#define IAVF_RX_DESC_STATUS_EXT_UDP_0_MASK   BIT(8)
> +#define IAVF_RX_DESC_STATUS_UMBCAST_MASK     GENMASK_ULL(10, 9)
> +#define IAVF_RX_DESC_STATUS_FLM_MASK         BIT(11)
> +#define IAVF_RX_DESC_STATUS_FLTSTAT_MASK     GENMASK_ULL(13, 12)
> +#define IAVF_RX_DESC_STATUS_LPBK_MASK                BIT(14)
> +#define IAVF_RX_DESC_STATUS_IPV6EXADD_MASK   BIT(15)
> +#define IAVF_RX_DESC_STATUS_RESERVED_MASK    GENMASK_ULL(17, 16)
> +/* Note: For non-tunnel packets INT_UDP_0 is the right status for
> + * UDP header
> + */
> +#define IAVF_RX_DESC_STATUS_INT_UDP_0_MASK   BIT(18)
> +
> +#define IAVF_RX_FLEX_DESC_STATUS_ERR0_EOP_BIT        BIT(1)
> +#define IAVF_RX_FLEX_DESC_STATUS_ERR0_RXE_BIT        BIT(10)
>  
> -#define IAVF_RXD_QW1_STATUS_SHIFT    0
> -#define IAVF_RXD_QW1_STATUS_MASK     ((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \
> -                                      << IAVF_RXD_QW1_STATUS_SHIFT)
> +#define IAVF_RXD_QW1_STATUS_MASK             (BIT(19) - 1)

GENMASK().

>  
>  #define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT
>  #define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK  (0x3UL << \
> @@ -301,18 +339,16 @@ enum iavf_rx_desc_fltstat_values {
>  #define IAVF_RXD_QW1_ERROR_SHIFT     19
>  #define IAVF_RXD_QW1_ERROR_MASK              (0xFFUL << 
> IAVF_RXD_QW1_ERROR_SHIFT)
>  
> -enum iavf_rx_desc_error_bits {
> -     /* Note: These are predefined bit offsets */
> -     IAVF_RX_DESC_ERROR_RXE_SHIFT            = 0,
> -     IAVF_RX_DESC_ERROR_RECIPE_SHIFT         = 1,
> -     IAVF_RX_DESC_ERROR_HBO_SHIFT            = 2,
> -     IAVF_RX_DESC_ERROR_L3L4E_SHIFT          = 3, /* 3 BITS */
> -     IAVF_RX_DESC_ERROR_IPE_SHIFT            = 3,
> -     IAVF_RX_DESC_ERROR_L4E_SHIFT            = 4,
> -     IAVF_RX_DESC_ERROR_EIPE_SHIFT           = 5,
> -     IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT       = 6,
> -     IAVF_RX_DESC_ERROR_PPRS_SHIFT           = 7
> -};
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_DESC_ERROR_RXE_MASK          BIT(0)
> +#define IAVF_RX_DESC_ERROR_RECIPE_MASK               BIT(1)
> +#define IAVF_RX_DESC_ERROR_HBO_MASK          BIT(2)
> +#define IAVF_RX_DESC_ERROR_L3L4E_MASK                GENMASK_ULL(5, 3)
> +#define IAVF_RX_DESC_ERROR_IPE_MASK          BIT(3)
> +#define IAVF_RX_DESC_ERROR_L4E_MASK          BIT(4)
> +#define IAVF_RX_DESC_ERROR_EIPE_MASK         BIT(5)
> +#define IAVF_RX_DESC_ERROR_OVERSIZE_MASK     BIT(6)
> +#define IAVF_RX_DESC_ERROR_PPRS_MASK         BIT(7)
>  
>  enum iavf_rx_desc_error_l3l4e_fcoe_masks {
>       IAVF_RX_DESC_ERROR_L3L4E_NONE           = 0,
> @@ -325,6 +361,41 @@ enum iavf_rx_desc_error_l3l4e_fcoe_masks {
>  #define IAVF_RXD_QW1_PTYPE_SHIFT     30
>  #define IAVF_RXD_QW1_PTYPE_MASK              (0xFFULL << 
> IAVF_RXD_QW1_PTYPE_SHIFT)
>  
> +/* for iavf_32byte_rx_flex_wb.ptype_flexi_flags0 member */
> +#define IAVF_RX_FLEX_DESC_PTYPE_M      (0x3FF) /* 10-bits */

Redundant braces + GENMASK()

> +
> +/* for iavf_32byte_rx_flex_wb.pkt_length member */
> +#define IAVF_RX_FLEX_DESC_PKT_LEN_M    (0x3FFF) /* 14-bits */

Same.

> +
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_FLEX_DESC_STATUS0_DD_M                       BIT(0)
> +#define IAVF_RX_FLEX_DESC_STATUS0_EOF_M                      BIT(1)
> +#define IAVF_RX_FLEX_DESC_STATUS0_HBO_M                      BIT(2)
> +#define IAVF_RX_FLEX_DESC_STATUS0_L3L4P_M            BIT(3)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_M         BIT(4)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_M         BIT(5)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M                BIT(6)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M               BIT(7)
> +#define IAVF_RX_FLEX_DESC_STATUS0_LPBK_M             BIT(8)
> +#define IAVF_RX_FLEX_DESC_STATUS0_IPV6EXADD_M                BIT(9)
> +#define IAVF_RX_FLEX_DESC_STATUS0_RXE_M                      BIT(10)
> +#define IAVF_RX_FLEX_DESC_STATUS0_CRCP_                      BIT(11)
> +#define IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_M                BIT(12)
> +#define IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_M          BIT(13)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M     BIT(14)
> +#define IAVF_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M     BIT(15)
> +
> +/* Note: These are predefined bit offsets */
> +#define IAVF_RX_FLEX_DESC_STATUS1_CPM_M                      (0xFULL) /* 4 
> bits */

Redundant braces.
+ GENMASK_ULL(7, 0)?

> +#define IAVF_RX_FLEX_DESC_STATUS1_NAT_M                      BIT(4)
> +#define IAVF_RX_FLEX_DESC_STATUS1_CRYPTO_M           BIT(5)
> +/* [10:6] reserved */
> +#define IAVF_RX_FLEX_DESC_STATUS1_L2TAG2P_M          BIT(11)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M     BIT(12)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M     BIT(13)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M     BIT(14)
> +#define IAVF_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M     BIT(15)
> +
>  #define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT       38
>  #define IAVF_RXD_QW1_LENGTH_PBUF_MASK        (0x3FFFULL << \
>                                        IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c 
> b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> index 2693c3ad0830..5cbb375b7063 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
> @@ -402,6 +402,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
>       int pairs = adapter->num_active_queues;
>       struct virtchnl_queue_pair_info *vqpi;
>       u32 i, max_frame;
> +     u8 rx_flags = 0;
>       size_t len;
>  
>       max_frame = LIBIE_MAX_RX_FRM_LEN(adapter->rx_rings->pp->p.offset);
> @@ -419,6 +420,9 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
>       if (!vqci)
>               return;
>  
> +     if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP))
> +             rx_flags |= VIRTCHNL_PTP_RX_TSTAMP;
> +
>       vqci->vsi_id = adapter->vsi_res->vsi_id;
>       vqci->num_queue_pairs = pairs;
>       vqpi = vqci->qpair;
> @@ -441,6 +445,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter)
>               if (CRC_OFFLOAD_ALLOWED(adapter))
>                       vqpi->rxq.crc_disable = !!(adapter->netdev->features &
>                                                  NETIF_F_RXFCS);
> +             vqpi->rxq.flags = rx_flags;
>               vqpi++;
>       }

Thanks,
Olek

Reply via email to