This patch adds TUNSETQUEUE ioctl to let userspace can temporarily disable or enable a queue of macvtap. This is used to be compatible at API layer of tuntap to simplify the userspace to manage the queues.
This is done by just introduce a pointer detached which was set when the queue were detached. Only the queue with NULL set to this pointer can be used to forward packets. Signed-off-by: Jason Wang <jasow...@redhat.com> --- drivers/net/macvtap.c | 147 +++++++++++++++++++++++++++++++++++-------- include/linux/if_macvlan.h | 7 ++ 2 files changed, 126 insertions(+), 28 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e3f9344..06b10a5 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -85,67 +85,131 @@ static const struct proto_ops macvtap_socket_ops; */ static DEFINE_SPINLOCK(macvtap_lock); +static void macvtap_swap_slot(struct macvlan_dev *vlan, int a, int b) +{ + struct macvtap_queue *q1, *q2; + + if (a == b) + return; + + q1 = rcu_dereference_protected(vlan->taps[a], + lockdep_is_held(&macvtap_lock)); + q2 = rcu_dereference_protected(vlan->taps[b], + lockdep_is_held(&macvtap_lock)); + + BUG_ON(q1 == NULL || q2 == NULL); + + rcu_assign_pointer(vlan->taps[a], vlan->taps[b]); + rcu_assign_pointer(vlan->taps[b], q1); + + q1->queue_index = b; + q2->queue_index = a; +} + static int macvtap_set_queue(struct net_device *dev, struct file *file, - struct macvtap_queue *q) + struct macvtap_queue *q, bool enable) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EBUSY; + int total; spin_lock(&macvtap_lock); - if (vlan->numvtaps == MAX_MACVTAP_QUEUES) + + total = vlan->numvtaps + vlan->numdisabled; + if (!enable && total == MAX_MACVTAP_QUEUES) + goto out; + + err = -EINVAL; + if (enable && q->queue_index < vlan->numvtaps) goto out; err = 0; - rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); - sock_hold(&q->sk); - q->file = file; - q->queue_index = vlan->numvtaps; - file->private_data = q; + if (enable) { + BUG_ON(q->queue_index >= total); + macvtap_swap_slot(vlan, q->queue_index, vlan->numvtaps); + --vlan->numdisabled; + } else { + rcu_assign_pointer(q->vlan, vlan); + rcu_assign_pointer(vlan->taps[total], q); + sock_hold(&q->sk); + + q->file = file; + q->queue_index = total; + file->private_data = q; + if (vlan->numdisabled) + macvtap_swap_slot(vlan, vlan->numvtaps, total); + + /* Make sure the pointers were seen before numvtaps. */ + smp_wmb(); + } vlan->numvtaps++; - out: spin_unlock(&macvtap_lock); return err; } /* - * The file owning the queue got closed, give up both - * the reference that the files holds as well as the - * one from the macvlan_dev if that still exists. + * The file owning the queue got closed or disabled. + * + * If closed give up both the reference that the files holds as well as the + * one from the macvlan_dev if that still exists. If disabled, reshuffle the + * array of taps. * * Using the spinlock makes sure that we don't get * to the queue again after destroying it. */ -static void macvtap_put_queue(struct macvtap_queue *q) +static int macvtap_put_queue(struct macvtap_queue *q, bool clean) { - struct macvtap_queue *nq; struct macvlan_dev *vlan; + int err = -EINVAL; spin_lock(&macvtap_lock); vlan = rcu_dereference_protected(q->vlan, lockdep_is_held(&macvtap_lock)); + if (vlan) { + int total = vlan->numvtaps + vlan->numdisabled; int index = q->queue_index; - BUG_ON(index >= vlan->numvtaps); + bool disabled = q->queue_index >= vlan->numvtaps; - rcu_assign_pointer(vlan->taps[index], - vlan->taps[vlan->numvtaps - 1]); - RCU_INIT_POINTER(q->vlan, NULL); - nq = rcu_dereference_protected(vlan->taps[index], - lockdep_is_held(&macvtap_lock)); - nq->queue_index = index; + if (!clean) { + BUG_ON(q->queue_index >= total); + if (q->queue_index >= vlan->numvtaps) + goto out; + } - sock_put(&q->sk); - --vlan->numvtaps; - } + err = 0; + macvtap_swap_slot(vlan, index, total - 1); + if (!disabled && vlan->numdisabled) + macvtap_swap_slot(vlan, index, vlan->numvtaps - 1); + + if (clean) { + RCU_INIT_POINTER(vlan->taps[total - 1], NULL); + RCU_INIT_POINTER(q->vlan, NULL); + sock_put(&q->sk); + if (disabled) + --vlan->numdisabled; + else + --vlan->numvtaps; + } else { + --vlan->numvtaps; + ++vlan->numdisabled; + } + + } else if (clean) + err = 0; +out: spin_unlock(&macvtap_lock); synchronize_rcu(); - sock_put(&q->sk); + + if (clean) + sock_put(&q->sk); + + return err; } /* @@ -201,7 +265,7 @@ static void macvtap_del_queues(struct net_device *dev) /* macvtap_put_queue can free some slots, so go through all slots */ spin_lock(&macvtap_lock); - for (i = 0; i < vlan->numvtaps; i++) { + for (i = 0; i < vlan->numvtaps + vlan->numdisabled; i++) { q = rcu_dereference_protected(vlan->taps[i], lockdep_is_held(&macvtap_lock)); BUG_ON(q == NULL); @@ -211,6 +275,7 @@ static void macvtap_del_queues(struct net_device *dev) } /* guarantee that any future macvtap_set_queue will fail */ vlan->numvtaps = MAX_MACVTAP_QUEUES; + vlan->numdisabled = 0; spin_unlock(&macvtap_lock); synchronize_rcu(); @@ -384,7 +449,7 @@ static int macvtap_open(struct inode *inode, struct file *file) if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); - err = macvtap_set_queue(dev, file, q); + err = macvtap_set_queue(dev, file, q, false); if (err) sock_put(&q->sk); @@ -398,7 +463,7 @@ out: static int macvtap_release(struct inode *inode, struct file *file) { struct macvtap_queue *q = file->private_data; - macvtap_put_queue(q); + macvtap_put_queue(q, true); return 0; } @@ -922,6 +987,27 @@ static int macvtap_set_iff(struct file *file, struct ifreq __user *ifr_u) return 0; } +static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) +{ + struct macvtap_queue *q = file->private_data; + struct macvlan_dev *vlan; + int ret = -EINVAL; + + vlan = macvtap_get_vlan(q); + if (!vlan) + goto done; + + if (flags & IFF_ATTACH_QUEUE) + ret = macvtap_set_queue(vlan->dev, file, q, true); + else if (flags & IFF_DETACH_QUEUE) + ret = macvtap_put_queue(q, false); + + macvtap_put_vlan(vlan); + +done: + return ret; +} + /* * provide compatibility with generic tun/tap interface */ @@ -954,6 +1040,11 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, macvtap_put_vlan(vlan); return ret; + case TUNSETQUEUE: + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + return macvtap_ioctl_set_queue(file, u); + case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up)) return -EFAULT; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 32e943a..f134afb 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -69,8 +69,15 @@ struct macvlan_dev { u16 flags; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); + /* This array tracks all taps (include disabled ones) and will be + * reshuffled to keep the following order: + * [0, numvtaps) : enabled taps, + * [numvtaps, numvtaps + numdisabled) : disabled taps, + * [numvtaps + numdisabled, MAX_MACVTAP_QUEUES) : unused slots + */ struct macvtap_queue *taps[MAX_MACVTAP_QUEUES]; int numvtaps; + int numdisabled; int minor; }; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/