This makes each FDB entry's metadata dst accessible through the same
ENCAP uapi as lwtunnel uses.  The function signature is slightly
different due to metadata_dst <> lwtunnel_state.

Netlink encapsulation is done by callbacks in net_device_ops.  This is
because the metadata is always used in the context of a port / device on
the bridge; it's not meaningful in a "vacuum".  It makes no sense to
allow inputting metadata of a type that doesn't match the device (where
in lwtunnel it does, by just switching the encapsulation.)  Also, this
way a device can do extended checks of the validity of incoming data
from the user, ensuring it is actually usable.

Note this is not related to ndo_fill_metadata_dst(), that one is used
only by OVS and operates on a packet that is currently being switched,
i.e. data plane.  The API in this patch is control plane.

[TODO: maybe just pass the entire netlink attr block down?]
Signed-off-by: David Lamparter <equi...@diac24.net>
---
 include/linux/netdevice.h      | 18 +++++++++
 include/net/ip_tunnels.h       |  5 +++
 include/uapi/linux/neighbour.h |  2 +
 net/bridge/br.c                |  2 +-
 net/bridge/br_fdb.c            | 79 +++++++++++++++++++++++++++++++-------
 net/bridge/br_private.h        |  1 +
 net/ipv4/ip_tunnel_core.c      | 87 +++++++++++++++++++++++++++++++++---------
 7 files changed, 162 insertions(+), 32 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0f1c4cb2441e..2de46f8b3f4f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -828,6 +828,8 @@ struct xfrmdev_ops {
 };
 #endif
 
+struct metadata_dst;
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1128,6 +1130,15 @@ struct xfrmdev_ops {
  * void (*ndo_xdp_flush)(struct net_device *dev);
  *     This function is used to inform the driver to flush a paticular
  *     xpd tx queue. Must be called on same CPU as xdp_xmit.
+ * int (*ndo_metadst_fill)(struct sk_buff *skb, struct metadata_dst *dst);
+ *     Used to encapsulate a metadata_dst that is associated with this
+ *     netdevice into the appropriate netlink attributes on skb.
+ *     Needs to return a lwtunnel_encap_types value if valid data was filled.
+ * int (*ndo_metadst_build)(struct net_device *dev, struct nlattr *meta,
+ *                         struct metadata_dst **dst,
+ *                         struct netlink_ext_ack *extack);
+ *     Reverse of the previous function, build a metadata_dst from netlink
+ *     attributes.  Should perform appropriate validation.
  */
 struct net_device_ops {
        int                     (*ndo_init)(struct net_device *dev);
@@ -1314,6 +1325,13 @@ struct net_device_ops {
        int                     (*ndo_xdp_xmit)(struct net_device *dev,
                                                struct xdp_buff *xdp);
        void                    (*ndo_xdp_flush)(struct net_device *dev);
+
+       int                     (*ndo_metadst_fill)(struct sk_buff *skb,
+                                                   struct metadata_dst *dst);
+       int                     (*ndo_metadst_build)(struct net_device *dev,
+                                                    struct nlattr *meta,
+                                                    struct metadata_dst **dst,
+                                                    struct netlink_ext_ack 
*extack);
 };
 
 /**
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 520809912f03..e6181fb83324 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -451,6 +451,11 @@ void __init ip_tunnel_core_init(void);
 void ip_tunnel_need_metadata(void);
 void ip_tunnel_unneed_metadata(void);
 
+int ip_tunnel_fill_metadst(struct sk_buff *skb, struct metadata_dst *md_dst);
+int ip_tunnel_build_metadst(struct net_device *dev, struct nlattr *meta,
+                           struct metadata_dst **dst,
+                           struct netlink_ext_ack *extack);
+
 #else /* CONFIG_INET */
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state 
*lwtstate)
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 3199d28980b3..cd98ce4b8dd9 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -27,6 +27,8 @@ enum {
        NDA_MASTER,
        NDA_LINK_NETNSID,
        NDA_SRC_VNI,
+       NDA_ENCAP_TYPE,
+       NDA_ENCAP,
        __NDA_MAX
 };
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1ba7577..822dfcef2649 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -140,7 +140,7 @@ static int br_switchdev_event(struct notifier_block *unused,
        switch (event) {
        case SWITCHDEV_FDB_ADD_TO_BRIDGE:
                fdb_info = ptr;
-               err = br_fdb_external_learn_add(br, p, fdb_info->addr,
+               err = br_fdb_external_learn_add(br, p, NULL, fdb_info->addr,
                                                fdb_info->vid);
                if (err) {
                        err = notifier_from_errno(err);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ac3b916c39b..452d88bab1a0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -671,6 +671,27 @@ static int fdb_fill_info(struct sk_buff *skb, const struct 
net_bridge *br,
 
        if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
                goto nla_put_failure;
+       if (fdb->md_dst && fdb->dst) {
+               struct net_device *dev = fdb->dst->dev;
+
+               if (dev->netdev_ops &&
+                   dev->netdev_ops->ndo_metadst_fill) {
+                       struct nlattr *nest;
+                       int ret;
+
+                       nest = nla_nest_start(skb, NDA_ENCAP);
+                       if (!nest)
+                               goto nla_put_failure;
+                       ret = dev->netdev_ops->ndo_metadst_fill(skb,
+                                                               fdb->md_dst);
+                       if (ret < 0)
+                               goto nla_put_failure;
+                       nla_nest_end(skb, nest);
+
+                       if (ret && nla_put_u16(skb, NDA_ENCAP_TYPE, ret))
+                               goto nla_put_failure;
+               }
+       }
 
        nlmsg_end(skb, nlh);
        return 0;
@@ -776,10 +797,12 @@ int br_fdb_dump(struct sk_buff *skb,
 
 /* Update (create or replace) forwarding database entry */
 static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
-                        const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
+                        struct metadata_dst *md_dst, const __u8 *addr,
+                        __u16 state, __u16 flags, __u16 vid)
 {
        struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
        struct net_bridge_fdb_entry *fdb;
+       struct metadata_dst *old_dst;
        bool modified = false;
 
        /* If the port cannot learn allow only local and static entries */
@@ -799,7 +822,7 @@ static int fdb_add_entry(struct net_bridge *br, struct 
net_bridge_port *source,
                if (!(flags & NLM_F_CREATE))
                        return -ENOENT;
 
-               fdb = fdb_create(head, source, NULL, addr, vid, 0, 0);
+               fdb = fdb_create(head, source, md_dst, addr, vid, 0, 0);
                if (!fdb)
                        return -ENOMEM;
 
@@ -810,6 +833,11 @@ static int fdb_add_entry(struct net_bridge *br, struct 
net_bridge_port *source,
 
                if (fdb->dst != source) {
                        fdb->dst = source;
+
+                       old_dst = xchg(&fdb->md_dst,
+                                      metadata_dst_clone(md_dst));
+                       dst_release(&old_dst->dst);
+
                        modified = true;
                }
        }
@@ -849,8 +877,8 @@ static int fdb_add_entry(struct net_bridge *br, struct 
net_bridge_port *source,
 }
 
 static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
-                       struct net_bridge_port *p, const unsigned char *addr,
-                       u16 nlh_flags, u16 vid)
+                       struct net_bridge_port *p, struct metadata_dst *md_dst,
+                       const unsigned char *addr, u16 nlh_flags, u16 vid)
 {
        int err = 0;
 
@@ -862,14 +890,14 @@ static int __br_fdb_add(struct ndmsg *ndm, struct 
net_bridge *br,
                }
                local_bh_disable();
                rcu_read_lock();
-               br_fdb_update(br, p, NULL, addr, vid, true);
+               br_fdb_update(br, p, md_dst, addr, vid, true);
                rcu_read_unlock();
                local_bh_enable();
        } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
-               err = br_fdb_external_learn_add(br, p, addr, vid);
+               err = br_fdb_external_learn_add(br, p, md_dst, addr, vid);
        } else {
                spin_lock_bh(&br->hash_lock);
-               err = fdb_add_entry(br, p, addr, ndm->ndm_state,
+               err = fdb_add_entry(br, p, md_dst, addr, ndm->ndm_state,
                                    nlh_flags, vid);
                spin_unlock_bh(&br->hash_lock);
        }
@@ -886,6 +914,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
        struct net_bridge_port *p = NULL;
        struct net_bridge_vlan *v;
        struct net_bridge *br = NULL;
+       struct metadata_dst *md_dst = NULL;
        int err = 0;
 
        if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
@@ -898,6 +927,22 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                return -EINVAL;
        }
 
+       if (tb[NDA_ENCAP_TYPE] && tb[NDA_ENCAP]) {
+               if (!dev->netdev_ops ||
+                   !dev->netdev_ops->ndo_metadst_build) {
+                       pr_info("bridge: target device does not support 
ENCAP\n");
+                       return -EINVAL;
+               }
+
+               err = dev->netdev_ops->ndo_metadst_build(dev, tb[NDA_ENCAP],
+                                                        &md_dst, NULL);
+               if (err)
+                       return err;
+       } else if (tb[NDA_ENCAP_TYPE] || tb[NDA_ENCAP]) {
+               pr_info("bridge: RTM_NEWNEIGH with unpaired ENCAP_TYPE / 
ENCAP\n");
+               return -EINVAL;
+       }
+
        if (dev->priv_flags & IFF_EBRIDGE) {
                br = netdev_priv(dev);
                vg = br_vlan_group(br);
@@ -906,7 +951,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                if (!p) {
                        pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
                                dev->name);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out;
                }
                br = p->br;
                vg = nbp_vlan_group(p);
@@ -916,13 +962,14 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                v = br_vlan_find(vg, vid);
                if (!v || !br_vlan_should_use(v)) {
                        pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d 
on %s\n", vid, dev->name);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out;
                }
 
                /* VID was specified, so use it. */
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
+               err = __br_fdb_add(ndm, br, p, md_dst, addr, nlh_flags, vid);
        } else {
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
+               err = __br_fdb_add(ndm, br, p, md_dst, addr, nlh_flags, 0);
                if (err || !vg || !vg->num_vlans)
                        goto out;
 
@@ -933,13 +980,14 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                list_for_each_entry(v, &vg->vlan_list, vlist) {
                        if (!br_vlan_should_use(v))
                                continue;
-                       err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
+                       err = __br_fdb_add(ndm, br, p, md_dst, addr, nlh_flags, 
v->vid);
                        if (err)
                                goto out;
                }
        }
 
 out:
+       dst_release(&md_dst->dst);
        return err;
 }
 
@@ -1077,9 +1125,11 @@ void br_fdb_unsync_static(struct net_bridge *br, struct 
net_bridge_port *p)
 }
 
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
+                             struct metadata_dst *md_dst,
                              const unsigned char *addr, u16 vid)
 {
        struct net_bridge_fdb_entry *fdb;
+       struct metadata_dst *old_dst;
        struct hlist_head *head;
        bool modified = false;
        int err = 0;
@@ -1089,7 +1139,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, 
struct net_bridge_port *p,
        head = &br->hash[br_mac_hash(addr, vid)];
        fdb = br_fdb_find(br, addr, vid);
        if (!fdb) {
-               fdb = fdb_create(head, p, NULL, addr, vid, 0, 0);
+               fdb = fdb_create(head, p, md_dst, addr, vid, 0, 0);
                if (!fdb) {
                        err = -ENOMEM;
                        goto err_unlock;
@@ -1101,6 +1151,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, 
struct net_bridge_port *p,
 
                if (fdb->dst != p) {
                        fdb->dst = p;
+                       old_dst = xchg(&fdb->md_dst,
+                                      metadata_dst_clone(md_dst));
+                       dst_release(&old_dst->dst);
                        modified = true;
                }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 66d33352681f..dd426ccf7475 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -538,6 +538,7 @@ int br_fdb_dump(struct sk_buff *skb, struct 
netlink_callback *cb,
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
+                             struct metadata_dst *md_dst,
                              const unsigned char *addr, u16 vid);
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              const unsigned char *addr, u16 vid);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 2f39479be92f..9f921d4e2544 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -228,13 +228,10 @@ static const struct nla_policy 
ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
        [LWTUNNEL_IP_FLAGS]     = { .type = NLA_U16 },
 };
 
-static int ip_tun_build_state(struct nlattr *attr,
-                             unsigned int family, const void *cfg,
-                             struct lwtunnel_state **ts,
-                             struct netlink_ext_ack *extack)
+static int ip_tun_build_common(struct ip_tunnel_info *tun_info,
+                              struct nlattr *attr,
+                              struct netlink_ext_ack *extack)
 {
-       struct ip_tunnel_info *tun_info;
-       struct lwtunnel_state *new_state;
        struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
        int err;
 
@@ -243,14 +240,6 @@ static int ip_tun_build_state(struct nlattr *attr,
        if (err < 0)
                return err;
 
-       new_state = lwtunnel_state_alloc(sizeof(*tun_info));
-       if (!new_state)
-               return -ENOMEM;
-
-       new_state->type = LWTUNNEL_ENCAP_IP;
-
-       tun_info = lwt_tun_info(new_state);
-
        if (tb[LWTUNNEL_IP_ID])
                tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]);
 
@@ -272,16 +261,59 @@ static int ip_tun_build_state(struct nlattr *attr,
        tun_info->mode = IP_TUNNEL_INFO_TX;
        tun_info->options_len = 0;
 
-       *ts = new_state;
+       return 0;
+}
+
+static int ip_tun_build_state(struct nlattr *attr,
+                             unsigned int family, const void *cfg,
+                             struct lwtunnel_state **ts,
+                             struct netlink_ext_ack *extack)
+{
+       struct ip_tunnel_info *tun_info;
+       struct lwtunnel_state *new_state;
+       int err;
+
+       new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+       if (!new_state)
+               return -ENOMEM;
 
+       new_state->type = LWTUNNEL_ENCAP_IP;
+
+       tun_info = lwt_tun_info(new_state);
+       err = ip_tun_build_common(tun_info, attr, extack);
+       if (err) {
+               lwtstate_free(new_state);
+               return err;
+       }
+
+       *ts = new_state;
        return 0;
 }
 
-static int ip_tun_fill_encap_info(struct sk_buff *skb,
-                                 struct lwtunnel_state *lwtstate)
+int ip_tunnel_build_metadst(struct net_device *dev, struct nlattr *meta,
+                           struct metadata_dst **dst,
+                           struct netlink_ext_ack *extack)
 {
-       struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+       struct metadata_dst *md_dst;
+       int err;
+
+       md_dst = metadata_dst_alloc(0, METADATA_IP_TUNNEL, GFP_ATOMIC);
+       if (!md_dst)
+               return -ENOMEM;
 
+       err = ip_tun_build_common(&md_dst->u.tun_info, meta, extack);
+       if (err) {
+               dst_release(&md_dst->dst);
+               return err;
+       }
+       *dst = md_dst;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_build_metadst);
+
+static int ip_tun_fill_common(struct sk_buff *skb,
+                             struct ip_tunnel_info *tun_info)
+{
        if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id,
                         LWTUNNEL_IP_PAD) ||
            nla_put_in_addr(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
@@ -294,6 +326,25 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
        return 0;
 }
 
+static int ip_tun_fill_encap_info(struct sk_buff *skb,
+                                 struct lwtunnel_state *lwtstate)
+{
+       struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+       return ip_tun_fill_common(skb, tun_info);
+}
+
+int ip_tunnel_fill_metadst(struct sk_buff *skb, struct metadata_dst *md_dst)
+{
+       int err;
+       if (md_dst->type != METADATA_IP_TUNNEL)
+               return 0;
+       err = ip_tun_fill_common(skb, &md_dst->u.tun_info);
+       if (err)
+               return err;
+       return LWTUNNEL_ENCAP_IP;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_fill_metadst);
+
 static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
 {
        return nla_total_size_64bit(8)  /* LWTUNNEL_IP_ID */
-- 
2.13.0

Reply via email to