Allow the guest to reuse the hash value to make receive steering
consistent between the host and guest, and to save hash computation.

Signed-off-by: Akihiko Odaki <akihiko.od...@daynix.com>
---
 Documentation/networking/tuntap.rst |   7 ++
 drivers/net/Kconfig                 |   1 +
 drivers/net/tun.c                   | 146 +++++++++++++++++++++++++++++++-----
 include/uapi/linux/if_tun.h         |  44 +++++++++++
 4 files changed, 180 insertions(+), 18 deletions(-)

diff --git a/Documentation/networking/tuntap.rst 
b/Documentation/networking/tuntap.rst
index 4d7087f727be..86b4ae8caa8a 100644
--- a/Documentation/networking/tuntap.rst
+++ b/Documentation/networking/tuntap.rst
@@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
       return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
   }
 
+3.4 Reference
+-------------
+
+``linux/if_tun.h`` defines the interface described below:
+
+.. kernel-doc:: include/uapi/linux/if_tun.h
+
 Universal TUN/TAP device driver Frequently Asked Question
 =========================================================
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 9920b3a68ed1..e2a7bd703550 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -395,6 +395,7 @@ config TUN
        tristate "Universal TUN/TAP device driver support"
        depends on INET
        select CRC32
+       select SKB_EXTENSIONS
        help
          TUN/TAP provides packet reception and transmission for user space
          programs.  It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9d93ab9ee58f..b8fcd71becac 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -173,6 +173,10 @@ struct tun_prog {
        struct bpf_prog *prog;
 };
 
+struct tun_vnet_hash_container {
+       struct tun_vnet_hash common;
+};
+
 /* Since the socket were moved to tun_file, to preserve the behavior of persist
  * device, socket filter, sndbuf and vnet header size were restore when the
  * file were attached to a persist device.
@@ -210,6 +214,7 @@ struct tun_struct {
        struct bpf_prog __rcu *xdp_prog;
        struct tun_prog __rcu *steering_prog;
        struct tun_prog __rcu *filter_prog;
+       struct tun_vnet_hash_container __rcu *vnet_hash;
        struct ethtool_link_ksettings link_ksettings;
        /* init args */
        struct file *file;
@@ -221,6 +226,11 @@ struct veth {
        __be16 h_vlan_TCI;
 };
 
+static const struct tun_vnet_hash tun_vnet_hash_cap = {
+       .flags = TUN_VNET_HASH_REPORT,
+       .types = VIRTIO_NET_SUPPORTED_HASH_TYPES
+};
+
 static void tun_flow_init(struct tun_struct *tun);
 static void tun_flow_uninit(struct tun_struct *tun);
 
@@ -322,10 +332,17 @@ static long tun_set_vnet_be(struct tun_struct *tun, int 
__user *argp)
        if (get_user(be, argp))
                return -EFAULT;
 
-       if (be)
+       if (be) {
+               struct tun_vnet_hash_container *vnet_hash = 
rtnl_dereference(tun->vnet_hash);
+
+               if (!(tun->flags & TUN_VNET_LE) &&
+                   vnet_hash && (vnet_hash->flags & TUN_VNET_HASH_REPORT))
+                       return -EBUSY;
+
                tun->flags |= TUN_VNET_BE;
-       else
+       } else {
                tun->flags &= ~TUN_VNET_BE;
+       }
 
        return 0;
 }
@@ -522,14 +539,20 @@ static inline void tun_flow_save_rps_rxhash(struct 
tun_flow_entry *e, u32 hash)
  * the userspace application move between processors, we may get a
  * different rxq no. here.
  */
-static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb,
+                                  const struct tun_vnet_hash_container 
*vnet_hash)
 {
+       struct tun_vnet_hash_ext *ext;
+       struct flow_keys keys;
        struct tun_flow_entry *e;
        u32 txq, numqueues;
 
        numqueues = READ_ONCE(tun->numqueues);
 
-       txq = __skb_get_hash_symmetric(skb);
+       memset(&keys, 0, sizeof(keys));
+       skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+       txq = flow_hash_from_keys(&keys);
        e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
        if (e) {
                tun_flow_save_rps_rxhash(e, txq);
@@ -538,6 +561,16 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, 
struct sk_buff *skb)
                txq = reciprocal_scale(txq, numqueues);
        }
 
+       if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_REPORT)) {
+               ext = skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+               if (ext) {
+                       u32 types = vnet_hash->common.types;
+
+                       ext->report = virtio_net_hash_report(types, keys.basic);
+                       ext->value = skb->l4_hash ? skb->hash : txq;
+               }
+       }
+
        return txq;
 }
 
@@ -565,10 +598,13 @@ static u16 tun_select_queue(struct net_device *dev, 
struct sk_buff *skb,
        u16 ret;
 
        rcu_read_lock();
-       if (rcu_dereference(tun->steering_prog))
+       if (rcu_dereference(tun->steering_prog)) {
                ret = tun_ebpf_select_queue(tun, skb);
-       else
-               ret = tun_automq_select_queue(tun, skb);
+       } else {
+               struct tun_vnet_hash_container *vnet_hash = 
rcu_dereference(tun->vnet_hash);
+
+               ret = tun_automq_select_queue(tun, skb, vnet_hash);
+       }
        rcu_read_unlock();
 
        return ret;
@@ -2120,33 +2156,63 @@ static ssize_t tun_put_user(struct tun_struct *tun,
        }
 
        if (vnet_hdr_sz) {
-               struct virtio_net_hdr gso;
+               struct tun_vnet_hash_ext *ext;
+               size_t vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
+               union {
+                       struct virtio_net_hdr hdr;
+                       struct virtio_net_hdr_v1_hash hdr_v1_hash;
+               } vnet_hdr;
+               int ret;
 
                if (iov_iter_count(iter) < vnet_hdr_sz)
                        return -EINVAL;
 
-               if (virtio_net_hdr_from_skb(skb, &gso,
-                                           tun_is_little_endian(tun), true,
-                                           vlan_hlen)) {
+               ext = vnet_hdr_sz < sizeof(vnet_hdr.hdr_v1_hash) ?
+                     NULL : skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
+
+               if (ext) {
+                       struct virtio_net_hash hash = {
+                               .value = ext->value,
+                               .report = ext->report,
+                       };
+
+                       vnet_hdr_content_sz = sizeof(vnet_hdr.hdr_v1_hash);
+                       ret = virtio_net_hdr_v1_hash_from_skb(skb,
+                                                             
&vnet_hdr.hdr_v1_hash,
+                                                             true,
+                                                             vlan_hlen,
+                                                             &hash);
+               } else {
+                       vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
+                       ret = virtio_net_hdr_from_skb(skb,
+                                                     &vnet_hdr.hdr,
+                                                     tun_is_little_endian(tun),
+                                                     true,
+                                                     vlan_hlen);
+               }
+
+               if (ret) {
                        struct skb_shared_info *sinfo = skb_shinfo(skb);
 
                        if (net_ratelimit()) {
                                netdev_err(tun->dev, "unexpected GSO type: 
0x%x, gso_size %d, hdr_len %d\n",
-                                          sinfo->gso_type, tun16_to_cpu(tun, 
gso.gso_size),
-                                          tun16_to_cpu(tun, gso.hdr_len));
+                                          sinfo->gso_type,
+                                          tun16_to_cpu(tun, 
vnet_hdr.hdr.gso_size),
+                                          tun16_to_cpu(tun, 
vnet_hdr.hdr.hdr_len));
                                print_hex_dump(KERN_ERR, "tun: ",
                                               DUMP_PREFIX_NONE,
                                               16, 1, skb->head,
-                                              min((int)tun16_to_cpu(tun, 
gso.hdr_len), 64), true);
+                                              min(tun16_to_cpu(tun, 
vnet_hdr.hdr.hdr_len), 64),
+                                              true);
                        }
                        WARN_ON_ONCE(1);
                        return -EINVAL;
                }
 
-               if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
+               if (copy_to_iter(&vnet_hdr, vnet_hdr_content_sz, iter) != 
vnet_hdr_content_sz)
                        return -EFAULT;
 
-               iov_iter_zero(vnet_hdr_sz - sizeof(gso), iter);
+               iov_iter_zero(vnet_hdr_sz - vnet_hdr_content_sz, iter);
        }
 
        if (vlan_hlen) {
@@ -3094,6 +3160,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
        int le;
        int ret;
        bool do_notify = false;
+       struct tun_vnet_hash vnet_hash_common;
+       struct tun_vnet_hash_container *vnet_hash;
 
        if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
            (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
@@ -3115,6 +3183,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                return open_related_ns(&net->ns, get_net_ns);
+       } else if (cmd == TUNGETVNETHASHCAP) {
+               return copy_to_user(argp, &tun_vnet_hash_cap, 
sizeof(tun_vnet_hash_cap)) ?
+                      -EFAULT : 0;
        }
 
        rtnl_lock();
@@ -3314,6 +3385,13 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                        break;
                }
 
+               vnet_hash = rtnl_dereference(tun->vnet_hash);
+               if (vnet_hash && (vnet_hash->common.flags & 
TUN_VNET_HASH_REPORT) &&
+                   vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr_v1_hash)) {
+                       ret = -EBUSY;
+                       break;
+               }
+
                tun->vnet_hdr_sz = vnet_hdr_sz;
                break;
 
@@ -3328,10 +3406,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                        ret = -EFAULT;
                        break;
                }
-               if (le)
+               if (le) {
                        tun->flags |= TUN_VNET_LE;
-               else
+               } else {
+                       vnet_hash = rtnl_dereference(tun->vnet_hash);
+                       if (vnet_hash && (vnet_hash->common.flags & 
TUN_VNET_HASH_REPORT) &&
+                           !tun_legacy_is_little_endian(tun)) {
+                               ret = -EBUSY;
+                               break;
+                       }
+
                        tun->flags &= ~TUN_VNET_LE;
+               }
                break;
 
        case TUNGETVNETBE:
@@ -3396,6 +3482,30 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
                ret = open_related_ns(&net->ns, get_net_ns);
                break;
 
+       case TUNSETVNETHASH:
+               if (copy_from_user(&vnet_hash_common, argp, 
sizeof(vnet_hash_common))) {
+                       ret = -EFAULT;
+                       break;
+               }
+               argp = (struct tun_vnet_hash __user *)argp + 1;
+
+               if ((vnet_hash_common.flags & TUN_VNET_HASH_REPORT) &&
+                   (tun->vnet_hdr_sz < sizeof(struct virtio_net_hdr_v1_hash) ||
+                    !tun_is_little_endian(tun))) {
+                       ret = -EBUSY;
+                       break;
+               }
+
+               vnet_hash = kmalloc(sizeof(vnet_hash->common), GFP_KERNEL);
+               if (!vnet_hash) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               vnet_hash->common = vnet_hash_common;
+               kfree_rcu_mightsleep(rcu_replace_pointer_rtnl(tun->vnet_hash, 
vnet_hash));
+               break;
+
        default:
                ret = -EINVAL;
                break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 287cdc81c939..1561e8ce0a0a 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -62,6 +62,30 @@
 #define TUNSETCARRIER _IOW('T', 226, int)
 #define TUNGETDEVNETNS _IO('T', 227)
 
+/**
+ * define TUNGETVNETHASHCAP - ioctl to get virtio_net hashing capability.
+ *
+ * The argument is a pointer to &struct tun_vnet_hash which will store the
+ * maximal virtio_net hashing configuration.
+ */
+#define TUNGETVNETHASHCAP _IOR('T', 228, struct tun_vnet_hash)
+
+/**
+ * define TUNSETVNETHASH - ioctl to configure virtio_net hashing
+ *
+ * The argument is a pointer to &struct tun_vnet_hash.
+ *
+ * %TUNSETVNETHDRSZ ioctl must be called with a number greater than or equal to
+ * the size of &struct virtio_net_hdr_v1_hash before calling this ioctl with
+ * %TUN_VNET_HASH_REPORT.
+ *
+ * The virtio_net header must be configured as little-endian before calling 
this
+ * ioctl with %TUN_VNET_HASH_REPORT.
+ *
+ * This ioctl currently has no effect on XDP packets.
+ */
+#define TUNSETVNETHASH _IOW('T', 229, struct tun_vnet_hash)
+
 /* TUNSETIFF ifr flags */
 #define IFF_TUN                0x0001
 #define IFF_TAP                0x0002
@@ -115,4 +139,24 @@ struct tun_filter {
        __u8   addr[][ETH_ALEN];
 };
 
+/**
+ * define TUN_VNET_HASH_REPORT - Request virtio_net hash reporting for vhost
+ */
+#define TUN_VNET_HASH_REPORT   0x0001
+
+/**
+ * struct tun_vnet_hash - virtio_net hashing configuration
+ * @flags:
+ *             Bitmask consists of %TUN_VNET_HASH_REPORT and %TUN_VNET_HASH_RSS
+ * @pad:
+ *             Should be filled with zero before passing to %TUNSETVNETHASH
+ * @types:
+ *             Bitmask of allowed hash types
+ */
+struct tun_vnet_hash {
+       __u16 flags;
+       __u8 pad[2];
+       __u32 types;
+};
+
 #endif /* _UAPI__IF_TUN_H */

-- 
2.46.0


Reply via email to