On 11/08/15 22:45, Roopa Prabhu wrote:
From: Roopa Prabhu <ro...@cumulusnetworks.com>

moves mpls_route nexthop fields to a new mpls_nhlfe
struct. mpls_nhlfe represents a mpls nexthop label forwarding entry.
It prepares mpls route structure for multipath support.

In the process moves mpls_route structure into internal.h.

Is there a requirement for moving this and the new datastructures into internal.h? I may have missed it, but I don't see any dependency on this in this patch series.

Moves some of the code from mpls_route_add into a separate mpls
nhlfe build function. changed mpls_rt_alloc to take number of
nexthops as argument.

A mpls route can point to multiple mpls_nhlfe. This patch
does not support multipath yet, hence the rest of the changes
assume that a mpls route points to a single mpls_nhlfe

Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com>
---
  net/mpls/af_mpls.c  |  225 ++++++++++++++++++++++++++++-----------------------
  net/mpls/internal.h |   35 ++++++++
  2 files changed, 158 insertions(+), 102 deletions(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 8c5707d..cf86e9d 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -21,35 +21,6 @@
  #endif
  #include "internal.h"

-#define LABEL_NOT_SPECIFIED (1<<20)
-#define MAX_NEW_LABELS 2
-
-/* This maximum ha length copied from the definition of struct neighbour */
-#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
-
-enum mpls_payload_type {
-       MPT_UNSPEC, /* IPv4 or IPv6 */
-       MPT_IPV4 = 4,
-       MPT_IPV6 = 6,
-
-       /* Other types not implemented:
-        *  - Pseudo-wire with or without control word (RFC4385)
-        *  - GAL (RFC5586)
-        */
-};
-
-struct mpls_route { /* next hop label forwarding entry */
-       struct net_device __rcu *rt_dev;
-       struct rcu_head         rt_rcu;
-       u32                     rt_label[MAX_NEW_LABELS];
-       u8                      rt_protocol; /* routing protocol that set this 
entry */
-       u8                      rt_payload_type;
-       u8                      rt_labels;
-       u8                      rt_via_alen;
-       u8                      rt_via_table;
-       u8                      rt_via[0];
-};
-
  static int zero = 0;
  static int label_limit = (1 << 20) - 1;

...
@@ -281,13 +254,15 @@ struct mpls_route_config {
        struct nl_info          rc_nlinfo;
  };

-static struct mpls_route *mpls_rt_alloc(size_t alen)
+static struct mpls_route *mpls_rt_alloc(int num_nh)
  {
        struct mpls_route *rt;

-       rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
+       rt = kzalloc(sizeof(*rt) + (num_nh * sizeof(struct mpls_nhlfe)),

How about this instead:
  offsetof(typeof(*rt), rt_nh[num_nh])
?

That way, you don't need to write out the type of rt_nh here.

+                    GFP_KERNEL);
        if (rt)
-               rt->rt_via_alen = alen;
+               rt->rt_nhn = num_nh;
+
        return rt;
  }

@@ -322,7 +297,7 @@ static void mpls_route_update(struct net *net, unsigned 
index,

        platform_label = rtnl_dereference(net->mpls.platform_label);
        rt = rtnl_dereference(platform_label[index]);
-       if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
+       if (!dev || (rt && (rtnl_dereference(rt->rt_nh->nh_dev) == dev))) {
                rcu_assign_pointer(platform_label[index], new);
                old = rt;
        }
@@ -406,23 +381,23 @@ static struct net_device *inet6_fib_lookup_dev(struct net 
*net, void *addr)
  #endif

  static struct net_device *find_outdev(struct net *net,
-                                     struct mpls_route_config *cfg)
+                                     struct mpls_nhlfe *nhlfe, int oif)
  {
        struct net_device *dev = NULL;

-       if (!cfg->rc_ifindex) {
-               switch (cfg->rc_via_table) {
+       if (!oif) {
+               switch (nhlfe->nh_via_table) {
                case NEIGH_ARP_TABLE:
-                       dev = inet_fib_lookup_dev(net, cfg->rc_via);
+                       dev = inet_fib_lookup_dev(net, nhlfe->nh_via);
                        break;
                case NEIGH_ND_TABLE:
-                       dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+                       dev = inet6_fib_lookup_dev(net, nhlfe->nh_via);
                        break;
                case NEIGH_LINK_TABLE:
                        break;
                }
        } else {
-               dev = dev_get_by_index(net, cfg->rc_ifindex);
+               dev = dev_get_by_index(net, oif);
        }

        if (!dev)
@@ -431,15 +406,81 @@ static struct net_device *find_outdev(struct net *net,
        return dev;
  }

+int mpls_nhlfe_assign_dev(struct net *net, struct mpls_nhlfe *nhlfe, int oif)
+{
+       struct net_device *dev = NULL;
+       int err = -ENODEV;
+
+       dev = find_outdev(net, nhlfe, oif);
+       if (IS_ERR(dev)) {
+               err = PTR_ERR(dev);
+               dev = NULL;
+               goto errout;
+       }
+
+       /* Ensure this is a supported device */
+       err = -EINVAL;
+       if (!mpls_dev_get(dev))
+               goto errout;
+
+       /* For now just support ethernet devices */
+       err = -EINVAL;
+       if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
+               goto errout;

Isn't this interface type check redundant with the mpls_dev_get call just above?

+
+       RCU_INIT_POINTER(nhlfe->nh_dev, dev);
+       dev_put(dev);

Is it safe to release the reference to dev here? Prior to these changes, it was released after mpls_route_update is called in mpls_route_add - is that OK without a reference?

+
+       return 0;
+
+errout:
+       if (dev)
+               dev_put(dev);
+       return err;
+}
+
+static int mpls_nhlfe_build(struct mpls_route_config *cfg,
+                           struct mpls_nhlfe *nhlfe)
+{
+       struct net *net = cfg->rc_nlinfo.nl_net;
+       int err = -ENOMEM;
+       int i;
+
+       if (!nhlfe)
+               goto errout;
+
+       err = -EINVAL;
+       /* Ensure only a supported number of labels are present */
+       if (cfg->rc_output_labels > MAX_NEW_LABELS)
+               goto errout;
+
+       nhlfe->nh_labels = cfg->rc_output_labels;
+       for (i = 0; i < nhlfe->nh_labels; i++)
+               nhlfe->nh_label[i] = cfg->rc_output_label[i];
+
+       nhlfe->nh_payload_type = cfg->rc_payload_type;
+       nhlfe->nh_via_table = cfg->rc_via_table;
+       memcpy(nhlfe->nh_via, cfg->rc_via, cfg->rc_via_alen);
+       nhlfe->nh_via_alen = cfg->rc_via_alen;

Shouldn't this check that was removed from mpls_route_add be added here:

-       if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
-           (dev->addr_len != cfg->rc_via_alen))
-               goto errout;

?

+
+       err = mpls_nhlfe_assign_dev(net, nhlfe, cfg->rc_ifindex);
+       if (err)
+               goto errout;
+
+       return 0;
+
+errout:
+       return err;
+}
+
  static int mpls_route_add(struct mpls_route_config *cfg)
  {
        struct mpls_route __rcu **platform_label;
        struct net *net = cfg->rc_nlinfo.nl_net;
-       struct net_device *dev = NULL;
        struct mpls_route *rt, *old;
-       unsigned index;
-       int i;
        int err = -EINVAL;
+       unsigned index;
+       int nhs = 1; /* default to one nexthop */

        index = cfg->rc_label;

@@ -457,27 +498,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
        if (index >= net->mpls.platform_labels)
                goto errout;

-       /* Ensure only a supported number of labels are present */
-       if (cfg->rc_output_labels > MAX_NEW_LABELS)
-               goto errout;
-
-       dev = find_outdev(net, cfg);
-       if (IS_ERR(dev)) {
-               err = PTR_ERR(dev);
-               dev = NULL;
-               goto errout;
-       }
-
-       /* Ensure this is a supported device */
-       err = -EINVAL;
-       if (!mpls_dev_get(dev))
-               goto errout;
-
-       err = -EINVAL;
-       if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
-           (dev->addr_len != cfg->rc_via_alen))
-               goto errout;
-
        /* Append makes no sense with mpls */
        err = -EOPNOTSUPP;
        if (cfg->rc_nlflags & NLM_F_APPEND)
...
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 2681a4b..f05e2e8 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
...
@@ -21,6 +32,30 @@ struct mpls_dev {

  struct sk_buff;

+#define LABEL_NOT_SPECIFIED (1 << 20)
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+
+struct mpls_nhlfe { /* next hop label forwarding entry */

Can we just call this mpls_nh? IMHO, NHLFE is a bit of MPLS-specific jargon that doesn't really add anything and may impact readability for anybody familiar with other protocols, but not with the MPLS RFCs.

+       struct net_device __rcu *nh_dev;
+       u32                     nh_label[MAX_NEW_LABELS];
+       unsigned int            nh_flags;

Is it worth adding the nh_flags field? Unless I missed something it isn't used in this patch and isn't written in any subsequent patches.

+       u8                      nh_payload_type;

nh_payload_type isn't really an attribute of the nexthop, but of the route - it's signally the type of traffic that is using that route.

+       u8                      nh_labels;
+       u8                      nh_via_alen;
+       u8                      nh_via_table;
+       u8                      nh_via[MAX_VIA_ALEN];

MAX_VIA_ALEN is 32 bytes, so there is now a 28 byte overhead per nexthop for the common case of using an IPv4 nexthop. With a large number of routes/nexthops, this will become noticeable. If we avoided using a fixed allocation, it would mitigate this.

+};
+
+struct mpls_route { /* next hop label forwarding entry */

Is it still the NHLFE? :-)

+       struct rcu_head         rt_rcu;
+       u8                      rt_protocol;
+       int                     rt_nhn;
+       struct mpls_nhlfe       rt_nh[0];
+};
+
  static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
  {
        return (struct mpls_shim_hdr *)skb_network_header(skb);


Thanks,
Rob
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to