On Thu, Nov 30, 2017 at 9:39 AM, Paolo Abeni <pab...@redhat.com> wrote: > Since commit e32ea7e74727 ("soreuseport: fast reuseport UDP socket > selection") and commit c125e80b8868 ("soreuseport: fast reuseport > TCP socket selection") the relevant reuseport socket matching the current > packet is selected by the reuseport_select_sock() call. The only > exceptions are invalid BPF filters/filters returning out-of-range > indices. > In the latter case the code implicitly falls back to using the hash > demultiplexing, but instead of selecting the socket inside the > reuseport_select_sock() function, it relies on the hash selection > logic introduced with the early soreuseport implementation. > > With this patch, in case of a BPF filter returning a bad socket > index value, we fall back to hash-based selection inside the > reuseport_select_sock() body, so that we can drop some duplicate > code in the ipv4 and ipv6 stack. > > This also allows faster lookup in the above scenario and will allow > us to avoid computing the hash value for successful, BPF based > demultiplexing - in a later patch. > > Signed-off-by: Paolo Abeni <pab...@redhat.com>
Great optimization! Acked-by: Craig Gallek <kr...@google.com> > --- > net/core/sock_reuseport.c | 4 +++- > net/ipv4/inet_hashtables.c | 11 ++--------- > net/ipv4/udp.c | 22 ++++------------------ > net/ipv6/inet6_hashtables.c | 11 ++--------- > net/ipv6/udp.c | 22 ++++------------------ > 5 files changed, 15 insertions(+), 55 deletions(-) > > diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c > index 5eeb1d20cc38..c5bb52bc73a1 100644 > --- a/net/core/sock_reuseport.c > +++ b/net/core/sock_reuseport.c > @@ -235,7 +235,9 @@ struct sock *reuseport_select_sock(struct sock *sk, > > if (prog && skb) > sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); > - else > + > + /* no bpf or invalid bpf result: fall back to hash usage */ > + if (!sk2) > sk2 = reuse->socks[reciprocal_scale(hash, socks)]; > } > > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > index e7d15fb0d94d..427b705d7c64 100644 > --- a/net/ipv4/inet_hashtables.c > +++ b/net/ipv4/inet_hashtables.c > @@ -216,32 +216,25 @@ struct sock *__inet_lookup_listener(struct net *net, > { > unsigned int hash = inet_lhashfn(net, hnum); > struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; > - int score, hiscore = 0, matches = 0, reuseport = 0; > bool exact_dif = inet_exact_dif_match(net, skb); > struct sock *sk, *result = NULL; > + int score, hiscore = 0; > u32 phash = 0; > > sk_for_each_rcu(sk, &ilb->head) { > score = compute_score(sk, net, hnum, daddr, > dif, sdif, exact_dif); > if (score > hiscore) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > phash = inet_ehashfn(net, daddr, hnum, > saddr, sport); > result = reuseport_select_sock(sk, phash, > skb, doff); > if (result) > return result; > - matches = 1; > } > result = sk; > hiscore = score; > - } else if (score == hiscore && reuseport) { > - matches++; > - if (reciprocal_scale(phash, matches) == 0) > - result = sk; > - phash = next_pseudo_random32(phash); > } > } > return result; > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index e4ff25c947c5..36f857c87fe2 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -445,7 +445,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, > struct sk_buff *skb) > { > struct sock *sk, *result; > - int score, badness, matches = 0, reuseport = 0; > + int score, badness; > u32 hash = 0; > > result = NULL; > @@ -454,23 +454,16 @@ static struct sock *udp4_lib_lookup2(struct net *net, > score = compute_score(sk, net, saddr, sport, > daddr, hnum, dif, sdif, exact_dif); > if (score > badness) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > hash = udp_ehashfn(net, daddr, hnum, > saddr, sport); > result = reuseport_select_sock(sk, hash, skb, > sizeof(struct > udphdr)); > if (result) > return result; > - matches = 1; > } > badness = score; > result = sk; > - } else if (score == badness && reuseport) { > - matches++; > - if (reciprocal_scale(hash, matches) == 0) > - result = sk; > - hash = next_pseudo_random32(hash); > } > } > return result; > @@ -488,7 +481,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 > saddr, > unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, > udptable->mask); > struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; > bool exact_dif = udp_lib_exact_dif_match(net, skb); > - int score, badness, matches = 0, reuseport = 0; > + int score, badness; > u32 hash = 0; > > if (hslot->count > 10) { > @@ -526,23 +519,16 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 > saddr, > score = compute_score(sk, net, saddr, sport, > daddr, hnum, dif, sdif, exact_dif); > if (score > badness) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > hash = udp_ehashfn(net, daddr, hnum, > saddr, sport); > result = reuseport_select_sock(sk, hash, skb, > sizeof(struct > udphdr)); > if (result) > return result; > - matches = 1; > } > result = sk; > badness = score; > - } else if (score == badness && reuseport) { > - matches++; > - if (reciprocal_scale(hash, matches) == 0) > - result = sk; > - hash = next_pseudo_random32(hash); > } > } > return result; > diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c > index b01858f5deb1..0d1451381f5c 100644 > --- a/net/ipv6/inet6_hashtables.c > +++ b/net/ipv6/inet6_hashtables.c > @@ -134,31 +134,24 @@ struct sock *inet6_lookup_listener(struct net *net, > { > unsigned int hash = inet_lhashfn(net, hnum); > struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; > - int score, hiscore = 0, matches = 0, reuseport = 0; > bool exact_dif = inet6_exact_dif_match(net, skb); > struct sock *sk, *result = NULL; > + int score, hiscore = 0; > u32 phash = 0; > > sk_for_each(sk, &ilb->head) { > score = compute_score(sk, net, hnum, daddr, dif, sdif, > exact_dif); > if (score > hiscore) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > phash = inet6_ehashfn(net, daddr, hnum, > saddr, sport); > result = reuseport_select_sock(sk, phash, > skb, doff); > if (result) > return result; > - matches = 1; > } > result = sk; > hiscore = score; > - } else if (score == hiscore && reuseport) { > - matches++; > - if (reciprocal_scale(phash, matches) == 0) > - result = sk; > - phash = next_pseudo_random32(phash); > } > } > return result; > diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c > index 3f30fa313bf2..c9f91c28b81d 100644 > --- a/net/ipv6/udp.c > +++ b/net/ipv6/udp.c > @@ -184,7 +184,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, > struct udp_hslot *hslot2, struct sk_buff *skb) > { > struct sock *sk, *result; > - int score, badness, matches = 0, reuseport = 0; > + int score, badness; > u32 hash = 0; > > result = NULL; > @@ -193,8 +193,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, > score = compute_score(sk, net, saddr, sport, > daddr, hnum, dif, sdif, exact_dif); > if (score > badness) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > hash = udp6_ehashfn(net, daddr, hnum, > saddr, sport); > > @@ -202,15 +201,9 @@ static struct sock *udp6_lib_lookup2(struct net *net, > sizeof(struct > udphdr)); > if (result) > return result; > - matches = 1; > } > result = sk; > badness = score; > - } else if (score == badness && reuseport) { > - matches++; > - if (reciprocal_scale(hash, matches) == 0) > - result = sk; > - hash = next_pseudo_random32(hash); > } > } > return result; > @@ -228,7 +221,7 @@ struct sock *__udp6_lib_lookup(struct net *net, > unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, > udptable->mask); > struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; > bool exact_dif = udp6_lib_exact_dif_match(net, skb); > - int score, badness, matches = 0, reuseport = 0; > + int score, badness; > u32 hash = 0; > > if (hslot->count > 10) { > @@ -267,23 +260,16 @@ struct sock *__udp6_lib_lookup(struct net *net, > score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, > sdif, exact_dif); > if (score > badness) { > - reuseport = sk->sk_reuseport; > - if (reuseport) { > + if (sk->sk_reuseport) { > hash = udp6_ehashfn(net, daddr, hnum, > saddr, sport); > result = reuseport_select_sock(sk, hash, skb, > sizeof(struct > udphdr)); > if (result) > return result; > - matches = 1; > } > result = sk; > badness = score; > - } else if (score == badness && reuseport) { > - matches++; > - if (reciprocal_scale(hash, matches) == 0) > - result = sk; > - hash = next_pseudo_random32(hash); > } > } > return result; > -- > 2.13.6 >