On Fri, Sep 29, 2023 at 01:42:27PM -0700, Haiyang Zhang wrote:
> Handle the case when GSO SKB linear length is too large.
> 
> MANA NIC requires GSO packets to put only the header part to SGE0,
> otherwise the TX queue may stop at the HW level.
> 
> So, use 2 SGEs for the skb linear part which contains more than the
> packet header.
> 
> Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network 
> Adapter (MANA)")
> Signed-off-by: Haiyang Zhang <haiya...@microsoft.com>
> ---
> v2: coding style updates suggested by Simon Horman
> 
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 191 +++++++++++++-----
>  include/net/mana/mana.h                       |   5 +-
>  2 files changed, 138 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 86e724c3eb89..48ea4aeeea5d 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -91,63 +91,137 @@ static unsigned int mana_checksum_info(struct sk_buff 
> *skb)
>       return 0;
>  }
>  
> +static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head 
> *ash,
> +                      int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey)
> +{
> +     ash->dma_handle[sg_i] = da;
> +     ash->size[sg_i] = sge_len;
> +
> +     tp->wqe_req.sgl[sg_i].address = da;
> +     tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey;
> +     tp->wqe_req.sgl[sg_i].size = sge_len;
> +}
> +
>  static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
> -                     struct mana_tx_package *tp)
> +                     struct mana_tx_package *tp, int gso_hs)
>  {
>       struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
> +     int hsg = 1; /* num of SGEs of linear part */
>       struct gdma_dev *gd = apc->ac->gdma_dev;
> +     int skb_hlen = skb_headlen(skb);
> +     int sge0_len, sge1_len = 0;
>       struct gdma_context *gc;
>       struct device *dev;
>       skb_frag_t *frag;
>       dma_addr_t da;
> +     int sg_i;
>       int i;
>  
>       gc = gd->gdma_context;
>       dev = gc->dev;
> -     da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
>  
> +     if (gso_hs && gso_hs < skb_hlen) {
> +             sge0_len = gso_hs;
> +             sge1_len = skb_hlen - gso_hs;
> +     } else {
> +             sge0_len = skb_hlen;
> +     }
> +
> +     da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE);
>       if (dma_mapping_error(dev, da))
>               return -ENOMEM;
>  
> -     ash->dma_handle[0] = da;
> -     ash->size[0] = skb_headlen(skb);
> +     mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey);
>  
> -     tp->wqe_req.sgl[0].address = ash->dma_handle[0];
> -     tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey;
> -     tp->wqe_req.sgl[0].size = ash->size[0];
> +     if (sge1_len) {
> +             sg_i = 1;
> +             da = dma_map_single(dev, skb->data + sge0_len, sge1_len,
> +                                 DMA_TO_DEVICE);
> +             if (dma_mapping_error(dev, da))
> +                     goto frag_err;
> +
> +             mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey);
> +             hsg = 2;
> +     }
>  
>       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> +             sg_i = hsg + i;
> +
>               frag = &skb_shinfo(skb)->frags[i];
>               da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
>                                     DMA_TO_DEVICE);
> -
>               if (dma_mapping_error(dev, da))
>                       goto frag_err;
>  
> -             ash->dma_handle[i + 1] = da;
> -             ash->size[i + 1] = skb_frag_size(frag);
> -
> -             tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1];
> -             tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey;
> -             tp->wqe_req.sgl[i + 1].size = ash->size[i + 1];
> +             mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag),
> +                          gd->gpa_mkey);
>       }
>  
>       return 0;
>  
>  frag_err:
> -     for (i = i - 1; i >= 0; i--)
> -             dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1],
> +     for (i = sg_i - 1; i >= hsg; i--)
> +             dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
>                              DMA_TO_DEVICE);
>  
> -     dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
> +     for (i = hsg - 1; i >= 0; i--)
> +             dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
> +                              DMA_TO_DEVICE);
>  
>       return -ENOMEM;
>  }
>  
> +/* Handle the case when GSO SKB linear length is too large.
> + * MANA NIC requires GSO packets to put only the packet header to SGE0.
> + * So, we need 2 SGEs for the skb linear part which contains more than the
> + * header.
> + * Return a positive value for the number of SGEs, or a negative value
> + * for an error.
> + */
> +static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb,
> +                          int gso_hs)
> +{
> +     int num_sge = 1 + skb_shinfo(skb)->nr_frags;
> +     int skb_hlen = skb_headlen(skb);
> +
> +     if (gso_hs < skb_hlen) {
> +             num_sge++;
> +     } else if (gso_hs > skb_hlen) {
> +             if (net_ratelimit())
> +                     netdev_err(ndev,
> +                                "TX nonlinear head: hs:%d, skb_hlen:%d\n",
> +                                gso_hs, skb_hlen);
> +
> +             return -EINVAL;
> +     }
> +
> +     return num_sge;
> +}
> +
> +/* Get the GSO packet's header size */
> +static int mana_get_gso_hs(struct sk_buff *skb)
> +{
> +     int gso_hs;
> +
> +     if (skb->encapsulation) {
> +             gso_hs = skb_inner_tcp_all_headers(skb);
> +     } else {
> +             if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
> +                     gso_hs = skb_transport_offset(skb) +
> +                              sizeof(struct udphdr);
> +             } else {
> +                     gso_hs = skb_tcp_all_headers(skb);
> +             }
> +     }
> +
> +     return gso_hs;
> +}
> +
>  netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  {
>       enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
>       struct mana_port_context *apc = netdev_priv(ndev);
> +     int gso_hs = 0; /* zero for non-GSO pkts */
>       u16 txq_idx = skb_get_queue_mapping(skb);
>       struct gdma_dev *gd = apc->ac->gdma_dev;
>       bool ipv4 = false, ipv6 = false;
> @@ -159,7 +233,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
> net_device *ndev)
>       struct mana_txq *txq;
>       struct mana_cq *cq;
>       int err, len;
> -     u16 ihs;
>  
>       if (unlikely(!apc->port_is_up))
>               goto tx_drop;
> @@ -209,19 +282,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
> net_device *ndev)
>       pkg.wqe_req.client_data_unit = 0;
>  
>       pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
> -     WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
> -
> -     if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
> -             pkg.wqe_req.sgl = pkg.sgl_array;
> -     } else {
> -             pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
> -                                         sizeof(struct gdma_sge),
> -                                         GFP_ATOMIC);
> -             if (!pkg.sgl_ptr)
> -                     goto tx_drop_count;
> -
> -             pkg.wqe_req.sgl = pkg.sgl_ptr;
> -     }
>  
>       if (skb->protocol == htons(ETH_P_IP))
>               ipv4 = true;
> @@ -229,6 +289,26 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
> net_device *ndev)
>               ipv6 = true;
>  
>       if (skb_is_gso(skb)) {
> +             int num_sge;
> +
> +             gso_hs = mana_get_gso_hs(skb);
> +
> +             num_sge = mana_fix_skb_head(ndev, skb, gso_hs);
> +             if (num_sge > 0)
> +                     pkg.wqe_req.num_sge = num_sge;
> +             else
> +                     goto tx_drop_count;
> +
> +             u64_stats_update_begin(&tx_stats->syncp);
> +             if (skb->encapsulation) {
> +                     tx_stats->tso_inner_packets++;
> +                     tx_stats->tso_inner_bytes += skb->len - gso_hs;
> +             } else {
> +                     tx_stats->tso_packets++;
> +                     tx_stats->tso_bytes += skb->len - gso_hs;
> +             }
> +             u64_stats_update_end(&tx_stats->syncp);
> +
>               pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
>               pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
>  
> @@ -252,26 +332,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
> net_device *ndev)
>                                                &ipv6_hdr(skb)->daddr, 0,
>                                                IPPROTO_TCP, 0);
>               }
> -
> -             if (skb->encapsulation) {
> -                     ihs = skb_inner_tcp_all_headers(skb);
> -                     u64_stats_update_begin(&tx_stats->syncp);
> -                     tx_stats->tso_inner_packets++;
> -                     tx_stats->tso_inner_bytes += skb->len - ihs;
> -                     u64_stats_update_end(&tx_stats->syncp);
> -             } else {
> -                     if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
> -                             ihs = skb_transport_offset(skb) + sizeof(struct 
> udphdr);
> -                     } else {
> -                             ihs = skb_tcp_all_headers(skb);
> -                     }
> -
> -                     u64_stats_update_begin(&tx_stats->syncp);
> -                     tx_stats->tso_packets++;
> -                     tx_stats->tso_bytes += skb->len - ihs;
> -                     u64_stats_update_end(&tx_stats->syncp);
> -             }
> -
>       } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>               csum_type = mana_checksum_info(skb);
>  
> @@ -294,11 +354,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct 
> net_device *ndev)
>               } else {
>                       /* Can't do offload of this type of checksum */
>                       if (skb_checksum_help(skb))
> -                             goto free_sgl_ptr;
> +                             goto tx_drop_count;
>               }
>       }
>  
> -     if (mana_map_skb(skb, apc, &pkg)) {
> +     WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES);
> +
> +     if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
> +             pkg.wqe_req.sgl = pkg.sgl_array;
> +     } else {
> +             pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
> +                                         sizeof(struct gdma_sge),
> +                                         GFP_ATOMIC);
> +             if (!pkg.sgl_ptr)
> +                     goto tx_drop_count;
> +
> +             pkg.wqe_req.sgl = pkg.sgl_ptr;
> +     }
> +
> +     if (mana_map_skb(skb, apc, &pkg, gso_hs)) {
>               u64_stats_update_begin(&tx_stats->syncp);
>               tx_stats->mana_map_err++;
>               u64_stats_update_end(&tx_stats->syncp);
> @@ -1256,11 +1330,16 @@ static void mana_unmap_skb(struct sk_buff *skb, 
> struct mana_port_context *apc)
>       struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
>       struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
>       struct device *dev = gc->dev;
> -     int i;
> +     int hsg, i;
> +
> +     /* Number of SGEs of linear part */
> +     hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1;
>  
> -     dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
> +     for (i = 0; i < hsg; i++)
> +             dma_unmap_single(dev, ash->dma_handle[i], ash->size[i],
> +                              DMA_TO_DEVICE);
>  
> -     for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
> +     for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++)
>               dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
>                              DMA_TO_DEVICE);
>  }
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 9f70b4332238..4d43adf18606 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -103,9 +103,10 @@ struct mana_txq {
>  
>  /* skb data and frags dma mappings */
>  struct mana_skb_head {
> -     dma_addr_t dma_handle[MAX_SKB_FRAGS + 1];
> +     /* GSO pkts may have 2 SGEs for the linear part*/
> +     dma_addr_t dma_handle[MAX_SKB_FRAGS + 2];
>  
> -     u32 size[MAX_SKB_FRAGS + 1];
> +     u32 size[MAX_SKB_FRAGS + 2];
>  };
>  
>  #define MANA_HEADROOM sizeof(struct mana_skb_head)
> -- 
> 2.25.1
Reviewed-by: Shradha Gupta <shradhagu...@linux.microsoft.com>

Reply via email to