On 8 Jul 2019, at 4:03, Ilya Maximets wrote:

There are 2 call chains:

  a) xsk_bind --> xdp_umem_assign_dev
  b) unregister_netdevice_queue --> xsk_notifier

with the following locking order:

  a) xs->mutex --> rtnl_lock
  b) rtnl_lock --> xdp.lock --> xs->mutex

Different order of taking 'xs->mutex' and 'rtnl_lock' could produce a
deadlock here. Fix that by moving the 'rtnl_lock' before 'xs->lock' in
the bind call chain (a).

Reported-by: syzbot+bf64ec93de836d7f4...@syzkaller.appspotmail.com
Fixes: 455302d1c9ae ("xdp: fix hang while unregistering device bound to xdp socket")
Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>

Thanks, Ilya!

I think in the long run the locking needs to be revisited,
but this should fix the deadlock for now.

Acked-by: Jonathan Lemon <jonathan.le...@gmail.com>



This patch is a fix for patch that is not yet in mainline, but
already in 'net' tree. I'm not sure what is the correct process
for applying such fixes.

 net/xdp/xdp_umem.c | 16 ++++++----------
 net/xdp/xsk.c      |  2 ++
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 20c91f02d3d8..83de74ca729a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
        struct netdev_bpf bpf;
        int err = 0;

+       ASSERT_RTNL();
+
        force_zc = flags & XDP_ZEROCOPY;
        force_copy = flags & XDP_COPY;

        if (force_zc && force_copy)
                return -EINVAL;

-       rtnl_lock();
-       if (xdp_get_umem_from_qid(dev, queue_id)) {
-               err = -EBUSY;
-               goto out_rtnl_unlock;
-       }
+       if (xdp_get_umem_from_qid(dev, queue_id))
+               return -EBUSY;

        err = xdp_reg_umem_at_qid(dev, umem, queue_id);
        if (err)
-               goto out_rtnl_unlock;
+               return err;

        umem->dev = dev;
        umem->queue_id = queue_id;
@@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,

        if (force_copy)
                /* For copy-mode, we are done. */
-               goto out_rtnl_unlock;
+               return 0;

        if (!dev->netdev_ops->ndo_bpf ||
            !dev->netdev_ops->ndo_xsk_async_xmit) {
@@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
        err = dev->netdev_ops->ndo_bpf(dev, &bpf);
        if (err)
                goto err_unreg_umem;
-       rtnl_unlock();

        umem->zc = true;
        return 0;
@@ -135,8 +133,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
                err = 0; /* fallback to copy mode */
        if (err)
                xdp_clear_umem_at_qid(dev, queue_id);
-out_rtnl_unlock:
-       rtnl_unlock();
        return err;
 }

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 703cf5ea448b..2aa6072a3e55 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -416,6 +416,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
        if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
                return -EINVAL;

+       rtnl_lock();
        mutex_lock(&xs->mutex);
        if (xs->state != XSK_READY) {
                err = -EBUSY;
@@ -501,6 +502,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                xs->state = XSK_BOUND;
 out_release:
        mutex_unlock(&xs->mutex);
+       rtnl_unlock();
        return err;
 }

--
2.17.1

Reply via email to