On Thu, Nov 26, 2020 at 12:24:25PM +0000, Tom Parkin wrote:
> This new ioctl pair allows two ppp channels to be bridged together:
> frames arriving in one channel are transmitted in the other channel
> and vice versa.

Thanks!
Some comments below (mostly about locking).

> The practical use for this is primarily to support the L2TP Access
> Concentrator use-case.  The end-user session is presented as a ppp
> channel (typically PPPoE, although it could be e.g. PPPoA, or even PPP
> over a serial link) and is switched into a PPPoL2TP session for
> transmission to the LNS.  At the LNS the PPP session is terminated in
> the ISP's network.
> 
> When a PPP channel is bridged to another it takes a reference on the
> other's struct ppp_file.  This reference is dropped when the channels
> are unbridged, which can occur either explicitly on userspace calling
> the PPPIOCUNBRIDGECHAN ioctl, or implicitly when either channel in the
> bridge is unregistered.
> 
> In order to implement the channel bridge, struct channel is extended
> with a new field, 'bridge', which points to the other struct channel
> making up the bridge.
> 
> This pointer is RCU protected to avoid adding another lock to the data
> path.
> 
> To guard against concurrent writes to the pointer, the existing struct
> channel lock 'downl' use is extended (rather than adding a new lock).
> Order of lock acquisition is maintained: i.e. the channel 'upl' lock is
> always acquired before 'downl' in code paths that need to hold both.
> 
> Signed-off-by: Tom Parkin <tpar...@katalix.com>
> ---
>  drivers/net/ppp/ppp_generic.c  | 147 ++++++++++++++++++++++++++++++++-
>  include/uapi/linux/ppp-ioctl.h |   2 +
>  2 files changed, 147 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
> index 7d005896a0f9..5e563bfb8e2a 100644
> --- a/drivers/net/ppp/ppp_generic.c
> +++ b/drivers/net/ppp/ppp_generic.c
> @@ -170,11 +170,12 @@ struct channel {
>       struct list_head list;          /* link in all/new_channels list */
>       struct ppp_channel *chan;       /* public channel data structure */
>       struct rw_semaphore chan_sem;   /* protects `chan' during chan ioctl */
> -     spinlock_t      downl;          /* protects `chan', file.xq dequeue */
> +     spinlock_t      downl;          /* protects `chan', 'bridge', file.xq 
> dequeue */
>       struct ppp      *ppp;           /* ppp unit we're connected to */
>       struct net      *chan_net;      /* the net channel belongs to */
>       struct list_head clist;         /* link in list of channels per unit */
>       rwlock_t        upl;            /* protects `ppp' */
> +     struct channel *bridge;         /* "bridged" ppp channel */

Missing __rcu annotation (as reported by kernel test robot):
struct channel __rcu *bridge;

With RCU protection, it might make sense to use ->upl, instead of
->downl, to protect the update side. Since ->upl is used to protect the
pointer to the parent unit, it probably makes sense to use it for
->bridge too, which somehow replaces the parent unit (as both are
mutually exclusive). Also, using ->upl would avoid some lock nesting
when updating ->bridge.

>  #ifdef CONFIG_PPP_MULTILINK
>       u8              avail;          /* flag used in multilink stuff */
>       u8              had_frag;       /* >= 1 fragments have been sent */
> @@ -606,6 +607,95 @@ static struct bpf_prog *compat_ppp_get_filter(struct 
> sock_fprog32 __user *p)
>  #endif
>  #endif
>  
> +/* Bridge one PPP channel to another.
> + * When two channels are bridged, ppp_input on one channel is redirected to
> + * the other's ops->start_xmit handler.
> + * In order to safely bridge channels we must reject channels which are 
> already
> + * part of a bridge instance, or which form part of an existing unit.
> + * Once successfully bridged, each channel holds a reference on the other
> + * to prevent it being freed while the bridge is extant.
> + */
> +static int ppp_bridge_channels(struct channel *pch, struct channel *pchb)
> +{
> +     int ret = -EALREADY;
> +
> +     /* We need to take each channel upl for access to the 'ppp' field,
> +      * and each channel downl for write access to the 'bridge' field.
> +      */
> +
> +     read_lock_bh(&pch->upl);
> +     if (pch->ppp)
> +             goto out0;
> +
> +     spin_lock(&pch->downl);
> +
> +     read_lock_bh(&pchb->upl);
> +     if (pchb->ppp)
> +             goto out1;

You're verifying that ->ppp isn't set, however, you haven't added a
test in ppp_connect_channel() to avoid setting ->ppp when ->bridge
is already set. Therefore, it'd still be possible to set both ->ppp and
->bridge on a channel.

> +     spin_lock(&pchb->downl);
> +
> +     if (pch->bridge || pchb->bridge)
> +             goto out2;
> +
> +     rcu_assign_pointer(pch->bridge, pchb);
> +     refcount_inc(&pchb->file.refcnt);
> +
> +     rcu_assign_pointer(pchb->bridge, pch);
> +     refcount_inc(&pch->file.refcnt);
> +
> +     ret = 0;
> +
> +out2:
> +     spin_unlock(&pchb->downl);
> +out1:
> +     read_unlock_bh(&pchb->upl);
> +     spin_unlock(&pch->downl);
> +out0:
> +     read_unlock_bh(&pch->upl);
> +
> +     return ret;
> +}

Locking looks dangerous here: given that ppp_bridge_channels() is
called with pn->all_channels_lock held, that's 5 nested locks.

Since we have to hold the channels anyway, why not incrementing the
refcount immediately and unlock everything as soon as possible?

That is, instead of doing:
  LOCK(all_channels_lock)
  LOCK(channel->upl)
  LOCK(channel->downl)
  LOCK(bridge->upl)
  LOCK(bridge->downl)
  assign_pointers
  UNLOCK()
  ...
  UNLOCK()

what about something like:

  LOCK(all_channels_lock)
  bridge = find_channel()
  refcount_inc(&bridge->refcount)
  UNLOCK(all_channels_lock)

  LOCK(channel->upl)
  LOCK(channel->downl)
  set ->bridge
  UNLOCK(channel->downl)
  UNLOCK(channel->upl)

  refcount_inc(&channel->refcount) // so that bridge holds a ref

  LOCK(bridge->upl)
  LOCK(bridge->downl)
  set ->bridge
  UNLOCK(bridge->downl)
  UNLOCK(bridge->upl)

We could even avoid locking ->downl if ->bridge was protected directly
by ->upl. That way we'd avoid nesting locks entirely.

> +static int ppp_unbridge_channels(struct channel *pch)
> +{
> +     struct channel *pchb;
> +
> +     rcu_read_lock();
> +
> +     pchb = rcu_dereference(pch->bridge);
> +     if (!pchb) {
> +             rcu_read_unlock();
> +             return -ENXIO;
> +     }
> +
> +     if (pch != rcu_dereference(pchb->bridge)) {
> +             rcu_read_unlock();
> +             return -ENXIO;
> +     }

Looks like we have a TOCTOU problem here: ->bridge might change before
we lock ->downl.

> +     spin_lock(&pch->downl);
> +     spin_lock(&pchb->downl);

I think we can have a deadlock here. Since ppp_unbridge_channels()
isn't running under the protection of an external lock, we could have
the bridge channel call this function concurrently. Then we'd have
lock inversion:

  ppp_unbridge_channels(channel)    ppp_unbridge_channels(bridge)
    LOCK(channel->downl)              LOCK(bridge->downl)
    LOCK(bridge->downl)               LOCK(channel->downl)  // deadlock

Here again I think we should avoid nesting locks and clear each
->bridge pointer independently.

> +     rcu_assign_pointer(pch->bridge, NULL);
> +     rcu_assign_pointer(pchb->bridge, NULL);

Nit, we can use RCU_INIT_POINTER() when resetting a pointer with NULL.

> +     spin_unlock(&pchb->downl);
> +     spin_unlock(&pch->downl);
> +
> +     rcu_read_unlock();
> +
> +     synchronize_rcu();
> +
> +     if (refcount_dec_and_test(&pch->file.refcnt))
> +             ppp_destroy_channel(pch);
> +
> +     if (refcount_dec_and_test(&pchb->file.refcnt))
> +             ppp_destroy_channel(pchb);
> +
> +     return 0;
> +}
> +
>  static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  {
>       struct ppp_file *pf;
> @@ -641,8 +731,9 @@ static long ppp_ioctl(struct file *file, unsigned int 
> cmd, unsigned long arg)
>       }
>  
>       if (pf->kind == CHANNEL) {
> -             struct channel *pch;
> +             struct channel *pch, *pchb;
>               struct ppp_channel *chan;
> +             struct ppp_net *pn;
>  
>               pch = PF_TO_CHANNEL(pf);
>  
> @@ -657,6 +748,22 @@ static long ppp_ioctl(struct file *file, unsigned int 
> cmd, unsigned long arg)
>                       err = ppp_disconnect_channel(pch);
>                       break;
>  
> +             case PPPIOCBRIDGECHAN:
> +                     if (get_user(unit, p))
> +                             break;
> +                     err = -ENXIO;
> +                     pn = ppp_pernet(current->nsproxy->net_ns);
> +                     spin_lock_bh(&pn->all_channels_lock);
> +                     pchb = ppp_find_channel(pn, unit);
> +                     if (pchb)
> +                             err = ppp_bridge_channels(pch, pchb);
> +                     spin_unlock_bh(&pn->all_channels_lock);
> +                     break;
> +
> +             case PPPIOCUNBRIDGECHAN:
> +                     err = ppp_unbridge_channels(pch);
> +                     break;
> +
>               default:
>                       down_read(&pch->chan_sem);
>                       chan = pch->chan;
> @@ -2089,6 +2196,35 @@ static bool ppp_decompress_proto(struct sk_buff *skb)
>       return pskb_may_pull(skb, 2);
>  }
>  
> +/* Attempt to handle a frame via. a bridged channel, if one exists.
> + * If the channel is bridged, the frame is consumed by the bridge.
> + * If not, the caller must handle the frame by normal recv mechanisms.
> + * Returns true if the frame is consumed, false otherwise.
> + */
> +static bool ppp_channel_bridge_input(struct channel *pch, struct sk_buff 
> *skb)
> +{
> +     struct channel *pchb;
> +
> +     rcu_read_lock();
> +     pchb = rcu_dereference(pch->bridge);
> +     if (pchb) {
> +             spin_lock(&pchb->downl);
> +             if (pchb->chan) {
> +                     skb_scrub_packet(skb, !net_eq(pch->chan_net, 
> pchb->chan_net));
> +                     if (!pchb->chan->ops->start_xmit(pchb->chan, skb))
> +                             kfree_skb(skb);
> +             } else {
> +                     /* channel got unregistered */
> +                     kfree_skb(skb);
> +             }
> +             spin_unlock(&pchb->downl);
> +     }
> +     rcu_read_unlock();
> +
> +     /* If pchb is set then we've consumed the packet */
> +     return pchb;
> +}

Maybe "return !!pchb;". I always find it unexpected to store a pointer
into a bool. But maybe it's just me.
Also, I believe the code could be made more readable by returning early
in unhandled cases, instead of nesting all the conditions.

>  void
>  ppp_input(struct ppp_channel *chan, struct sk_buff *skb)
>  {
> @@ -2100,6 +2236,10 @@ ppp_input(struct ppp_channel *chan, struct sk_buff 
> *skb)
>               return;
>       }
>  
> +     /* If the channel is bridged, transmit via. bridge */
> +     if (ppp_channel_bridge_input(pch, skb))
> +             return;
> +
>       read_lock_bh(&pch->upl);
>       if (!ppp_decompress_proto(skb)) {
>               kfree_skb(skb);
> @@ -2796,8 +2936,11 @@ ppp_unregister_channel(struct ppp_channel *chan)
>       list_del(&pch->list);
>       spin_unlock_bh(&pn->all_channels_lock);
>  
> +     ppp_unbridge_channels(pch);
> +
>       pch->file.dead = 1;
>       wake_up_interruptible(&pch->file.rwait);
> +
>       if (refcount_dec_and_test(&pch->file.refcnt))
>               ppp_destroy_channel(pch);
>  }
> diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
> index 7bd2a5a75348..8dbecb3ad036 100644
> --- a/include/uapi/linux/ppp-ioctl.h
> +++ b/include/uapi/linux/ppp-ioctl.h
> @@ -115,6 +115,8 @@ struct pppol2tp_ioc_stats {
>  #define PPPIOCATTCHAN        _IOW('t', 56, int)      /* attach to ppp 
> channel */
>  #define PPPIOCGCHAN  _IOR('t', 55, int)      /* get ppp channel number */
>  #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats)
> +#define PPPIOCBRIDGECHAN _IOW('t', 53, int)  /* bridge one channel to 
> another */
> +#define PPPIOCUNBRIDGECHAN _IO('t', 54)      /* unbridge channel */
>  
>  #define SIOCGPPPSTATS   (SIOCDEVPRIVATE + 0)
>  #define SIOCGPPPVER     (SIOCDEVPRIVATE + 1) /* NEVER change this!! */
> -- 
> 2.17.1
> 

Reply via email to