On 26.08.2019 9:10, Björn Töpel wrote:
> From: Björn Töpel <bjorn.to...@intel.com>
> 
> The state variable was read, and written outside the control mutex
> (struct xdp_sock, mutex), without proper barriers and {READ,
> WRITE}_ONCE correctness.
> 
> In this commit this issue is addressed, and the state member is now
> used a point of synchronization whether the socket is setup correctly
> or not.
> 
> This also fixes a race, found by syzcaller, in xsk_poll() where umem
> could be accessed when stale.
> 
> Suggested-by: Hillf Danton <hdan...@sina.com>
> Reported-by: syzbot+c82697e3043781e08...@syzkaller.appspotmail.com
> Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings")
> Signed-off-by: Björn Töpel <bjorn.to...@intel.com>
> ---
>  net/xdp/xsk.c | 57 ++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 40 insertions(+), 17 deletions(-)
> 
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index f3351013c2a5..8fafa3ce3ae6 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -162,10 +162,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct 
> xdp_buff *xdp, u32 len)
>       return err;
>  }
>  
> +static bool xsk_is_bound(struct xdp_sock *xs)
> +{
> +     if (READ_ONCE(xs->state) == XSK_BOUND) {
> +             /* Matches smp_wmb() in bind(). */
> +             smp_rmb();
> +             return true;
> +     }
> +     return false;
> +}
> +
>  int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
>  {
>       u32 len;
>  
> +     if (!xsk_is_bound(xs))
> +             return -EINVAL;
> +
>       if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
>               return -EINVAL;
>  
> @@ -362,7 +375,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr 
> *m, size_t total_len)
>       struct sock *sk = sock->sk;
>       struct xdp_sock *xs = xdp_sk(sk);
>  
> -     if (unlikely(!xs->dev))
> +     if (unlikely(!xsk_is_bound(xs)))
>               return -ENXIO;
>       if (unlikely(!(xs->dev->flags & IFF_UP)))
>               return -ENETDOWN;
> @@ -378,10 +391,15 @@ static unsigned int xsk_poll(struct file *file, struct 
> socket *sock,
>                            struct poll_table_struct *wait)
>  {
>       unsigned int mask = datagram_poll(file, sock, wait);
> -     struct sock *sk = sock->sk;
> -     struct xdp_sock *xs = xdp_sk(sk);
> -     struct net_device *dev = xs->dev;
> -     struct xdp_umem *umem = xs->umem;
> +     struct xdp_sock *xs = xdp_sk(sock->sk);
> +     struct net_device *dev;
> +     struct xdp_umem *umem;
> +
> +     if (unlikely(!xsk_is_bound(xs)))
> +             return mask;
> +
> +     dev = xs->dev;
> +     umem = xs->umem;
>  
>       if (umem->need_wakeup)
>               dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
> @@ -417,10 +435,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
>  {
>       struct net_device *dev = xs->dev;
>  
> -     if (!dev || xs->state != XSK_BOUND)
> +     if (xs->state != XSK_BOUND)
>               return;
> -
> -     xs->state = XSK_UNBOUND;
> +     WRITE_ONCE(xs->state, XSK_UNBOUND);
>  
>       /* Wait for driver to stop using the xdp socket. */
>       xdp_del_sk_umem(xs->umem, xs);
> @@ -495,7 +512,9 @@ static int xsk_release(struct socket *sock)
>       local_bh_enable();
>  
>       xsk_delete_from_maps(xs);
> +     mutex_lock(&xs->mutex);
>       xsk_unbind_dev(xs);
> +     mutex_unlock(&xs->mutex);
>  
>       xskq_destroy(xs->rx);
>       xskq_destroy(xs->tx);
> @@ -589,19 +608,18 @@ static int xsk_bind(struct socket *sock, struct 
> sockaddr *addr, int addr_len)
>               }
>  
>               umem_xs = xdp_sk(sock->sk);
> -             if (!umem_xs->umem) {
> -                     /* No umem to inherit. */
> +             if (!xsk_is_bound(umem_xs)) {

This changes the error code a bit.
Previously:
   umem exists + xs unbound    --> EINVAL
   no umem     + xs unbound    --> EBADF
   xs bound to different dev/q --> EINVAL

With this change:
   umem exists + xs unbound    --> EBADF
   no umem     + xs unbound    --> EBADF
   xs bound to different dev/q --> EINVAL

Just a note. Not sure if this is important.


>                       err = -EBADF;
>                       sockfd_put(sock);
>                       goto out_unlock;
> -             } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
> +             }
> +             if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
>                       err = -EINVAL;
>                       sockfd_put(sock);
>                       goto out_unlock;
>               }
> -
>               xdp_get_umem(umem_xs->umem);
> -             xs->umem = umem_xs->umem;
> +             WRITE_ONCE(xs->umem, umem_xs->umem);
>               sockfd_put(sock);
>       } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
>               err = -EINVAL;
> @@ -626,10 +644,15 @@ static int xsk_bind(struct socket *sock, struct 
> sockaddr *addr, int addr_len)
>       xdp_add_sk_umem(xs->umem, xs);
>  
>  out_unlock:
> -     if (err)
> +     if (err) {
>               dev_put(dev);
> -     else
> -             xs->state = XSK_BOUND;
> +     } else {
> +             /* Matches smp_rmb() in bind() for shared umem
> +              * sockets, and xsk_is_bound().
> +              */
> +             smp_wmb();
> +             WRITE_ONCE(xs->state, XSK_BOUND);
> +     }
>  out_release:
>       mutex_unlock(&xs->mutex);
>       rtnl_unlock();
> @@ -869,7 +892,7 @@ static int xsk_mmap(struct file *file, struct socket 
> *sock,
>       unsigned long pfn;
>       struct page *qpg;
>  
> -     if (xs->state != XSK_READY)
> +     if (READ_ONCE(xs->state) != XSK_READY)
>               return -EBUSY;
>  
>       if (offset == XDP_PGOFF_RX_RING) {
> 

Reply via email to