From: Björn Töpel <bjorn.to...@intel.com>

The XSKMAP update and delete functions called synchronize_net(), which
can sleep. It is not allowed to sleep during an RCU read section.

Instead we need to make sure that the sock sk_destruct (xsk_destruct)
function is asynchronously called after an RCU grace period. Setting
the SOCK_RCU_FREE flag for XDP sockets takes care of this.

Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type 
BPF_MAP_TYPE_XSKMAP")
Reported-by: Eric Dumazet <eric.duma...@gmail.com>
Signed-off-by: Björn Töpel <bjorn.to...@intel.com>
---
 kernel/bpf/xskmap.c | 10 ++--------
 net/xdp/xsk.c       |  2 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9f8463afda9c..47147c9e184d 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -192,11 +192,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void 
*key, void *value,
        sock_hold(sock->sk);
 
        old_xs = xchg(&m->xsk_map[i], xs);
-       if (old_xs) {
-               /* Make sure we've flushed everything. */
-               synchronize_net();
+       if (old_xs)
                sock_put((struct sock *)old_xs);
-       }
 
        sockfd_put(sock);
        return 0;
@@ -212,11 +209,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void 
*key)
                return -EINVAL;
 
        old_xs = xchg(&m->xsk_map[k], NULL);
-       if (old_xs) {
-               /* Make sure we've flushed everything. */
-               synchronize_net();
+       if (old_xs)
                sock_put((struct sock *)old_xs);
-       }
 
        return 0;
 }
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 0577cd49aa72..07156f43d295 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -754,6 +754,8 @@ static int xsk_create(struct net *net, struct socket *sock, 
int protocol,
        sk->sk_destruct = xsk_destruct;
        sk_refcnt_debug_inc(sk);
 
+       sock_set_flag(sk, SOCK_RCU_FREE);
+
        xs = xdp_sk(sk);
        mutex_init(&xs->mutex);
        spin_lock_init(&xs->tx_completion_lock);
-- 
2.17.1

Reply via email to