On 11/08/2018 10:21 PM, Li RongQing wrote:
> GRO for UDP needs to lookup socket twice, first is in gro receive,
> second is gro complete, so if store sock to skb to avoid looking up
> twice, this can give small performance boost
> 
> netperf -t UDP_RR -l 10
> 
> Before:
>       Rate per sec: 28746.01
> After:
>       Rate per sec: 29401.67
> 
> Signed-off-by: Li RongQing <lirongq...@baidu.com>
> ---
>  net/ipv4/udp_offload.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index 0646d61f4fa8..429570112a33 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -408,6 +408,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
> struct sk_buff *skb,
>  
>       if (udp_sk(sk)->gro_enabled) {
>               pp = call_gro_receive(udp_gro_receive_segment, head, skb);
> +
> +             if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> +                     sock_hold(sk);
> +                     pp->sk = sk;


You also have to set pp->destructor to sock_edemux

flush_gro_hash -> kfree_skb()

If there is no destructor, the reference on pp->sk will never be released.




> +             }
>               rcu_read_unlock();
>               return pp;
>       }
> @@ -444,6 +449,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, 
> struct sk_buff *skb,
>       skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
>       pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
>  
> +     if (!IS_ERR(pp) && NAPI_GRO_CB(pp)->count > 1) {
> +             sock_hold(sk);
> +             pp->sk = sk;
> +     }
>  out_unlock:
>       rcu_read_unlock();
>       skb_gro_flush_final(skb, pp, flush);
> @@ -502,7 +511,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>       uh->len = newlen;
>  
>       rcu_read_lock();
> -     sk = (*lookup)(skb, uh->source, uh->dest);
> +     sk = skb->sk;
> +     if (!sk)
> +             sk = (*lookup)(skb, uh->source, uh->dest);
>       if (sk && udp_sk(sk)->gro_enabled) {
>               err = udp_gro_complete_segment(skb);
>       } else if (sk && udp_sk(sk)->gro_complete) {
> @@ -516,6 +527,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
>               err = udp_sk(sk)->gro_complete(sk, skb,
>                               nhoff + sizeof(struct udphdr));
>       }
> +
> +     if (skb->sk) {
> +             sock_put(skb->sk);
> +             skb->sk = NULL;
> +     }
>       rcu_read_unlock();
>  
>       if (skb->remcsum_offload)
> 

Reply via email to