Patrick McHardy wrote:
> I took on Ben's challenge to increase the number of possible routing tables,
> these are the resulting patches.
> 
> The table IDs are changed to 32 bit values and are contained in a new netlink
> routing attribute. For compatibility rtm_table in struct rtmsg can still be
> used to access the first 255 tables and contains the low 8 bit of the table
> ID in case of dumps. Unfortunately there are no invalid values for rtm_table,
> so the best userspace can do in case of a new iproute version that tries to
> access tables > 255 on an old kernel is to use RTM_UNSPEC (0) for rtm_table,
> which will make the kernel allocate an empty table instead of silently adding
> routes to a more or less random table. The iproute patch will follow shortly.

Actually that last part wasn't entirely true. The last couple of
releases of the kernel include the inet_check_attr function,
which (unwillingly) breaks with the tradition of ignoring
unknown attributes and signals an error on receiving the RTA_TABLE
attribute. So the iproute patch only includes the RTA_TABLE
attribute when the table ID is > 255, in which case rtm_table
is set to RT_TABLE_UNSPEC. Old kernels will still have the
behaviour I described above. The patch has been tested to
behave as expected on both patched and unpatched kernels.

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5e33a20..7573c62 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -238,9 +238,8 @@ enum rt_class_t
        RT_TABLE_DEFAULT=253,
        RT_TABLE_MAIN=254,
        RT_TABLE_LOCAL=255,
-       __RT_TABLE_MAX
 };
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
+#define RT_TABLE_MAX 0xFFFFFFFF
 
 
 
@@ -263,6 +262,7 @@ enum rtattr_type_t
        RTA_CACHEINFO,
        RTA_SESSION,
        RTA_MP_ALGO,
+       RTA_TABLE,
        __RTA_MAX
 };
 
diff --git a/include/rt_names.h b/include/rt_names.h
index 2d9ef10..07a10e0 100644
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -5,7 +5,7 @@ #include <asm/types.h>
 
 char* rtnl_rtprot_n2a(int id, char *buf, int len);
 char* rtnl_rtscope_n2a(int id, char *buf, int len);
-char* rtnl_rttable_n2a(int id, char *buf, int len);
+char* rtnl_rttable_n2a(__u32 id, char *buf, int len);
 char* rtnl_rtrealm_n2a(int id, char *buf, int len);
 char* rtnl_dsfield_n2a(int id, char *buf, int len);
 int rtnl_rtprot_a2n(__u32 *id, char *arg);
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 1fe4a69..8b286b0 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -32,4 +32,12 @@ extern int do_multiaddr(int argc, char *
 extern int do_multiroute(int argc, char **argv);
 extern int do_xfrm(int argc, char **argv);
 
+static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
+{
+       __u32 table = r->rtm_table;
+       if (tb[RTA_TABLE])
+               table = *(__u32*) RTA_DATA(tb[RTA_TABLE]);
+       return table;
+}
+
 extern struct rtnl_handle rth;
diff --git a/ip/iproute.c b/ip/iproute.c
index a43c09e..4ebe617 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -75,7 +75,8 @@ static void usage(void)
 
 static struct
 {
-       int tb;
+       __u32 tb;
+       int cloned;
        int flushed;
        char *flushb;
        int flushp;
@@ -125,6 +126,7 @@ int print_route(const struct sockaddr_nl
        inet_prefix prefsrc;
        inet_prefix via;
        int host_len = -1;
+       __u32 table;
        SPRINT_BUF(b1);
        
 
@@ -151,27 +153,23 @@ int print_route(const struct sockaddr_nl
                host_len = 80;
 
        if (r->rtm_family == AF_INET6) {
+               if (filter.cloned) {
+                       if (!(r->rtm_flags&RTM_F_CLONED))
+                               return 0;
+               }
                if (filter.tb) {
-                       if (filter.tb < 0) {
-                               if (!(r->rtm_flags&RTM_F_CLONED))
-                                       return 0;
-                       } else {
-                               if (r->rtm_flags&RTM_F_CLONED)
+                       if (r->rtm_flags&RTM_F_CLONED)
+                               return 0;
+                       if (filter.tb == RT_TABLE_LOCAL) {
+                               if (r->rtm_type != RTN_LOCAL)
                                        return 0;
-                               if (filter.tb == RT_TABLE_LOCAL) {
-                                       if (r->rtm_type != RTN_LOCAL)
-                                               return 0;
-                               } else if (filter.tb == RT_TABLE_MAIN) {
-                                       if (r->rtm_type == RTN_LOCAL)
-                                               return 0;
-                               } else {
+                       } else if (filter.tb == RT_TABLE_MAIN) {
+                               if (r->rtm_type == RTN_LOCAL)
                                        return 0;
-                               }
+                       } else {
+                               return 0;
                        }
                }
-       } else {
-               if (filter.tb > 0 && filter.tb != r->rtm_table)
-                       return 0;
        }
        if ((filter.protocol^r->rtm_protocol)&filter.protocolmask)
                return 0;
@@ -225,6 +223,10 @@ int print_route(const struct sockaddr_nl
                        memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), 
host_len/8);
        }
 
+       table = rtm_get_table(r, tb);
+       if (r->rtm_family == AF_INET && filter.tb > 0 && filter.tb != table)
+               return 0;
+
        if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, 
filter.rdst.bitlen))
                return 0;
        if (filter.mdst.family && filter.mdst.bitlen >= 0 &&
@@ -354,8 +356,8 @@ int print_route(const struct sockaddr_nl
                fprintf(fp, "dev %s ", 
ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
 
        if (!(r->rtm_flags&RTM_F_CLONED)) {
-               if (r->rtm_table != RT_TABLE_MAIN && !filter.tb)
-                       fprintf(fp, " table %s ", 
rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+               if (table != RT_TABLE_MAIN && !filter.tb)
+                       fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, 
sizeof(b1)));
                if (r->rtm_protocol != RTPROT_BOOT && filter.protocolmask != -1)
                        fprintf(fp, " proto %s ", 
rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
                if (r->rtm_scope != RT_SCOPE_UNIVERSE && filter.scopemask != -1)
@@ -840,7 +842,12 @@ #endif
                        NEXT_ARG();
                        if (rtnl_rttable_a2n(&tid, *argv))
                                invarg("\"table\" value is invalid\n", *argv);
-                       req.r.rtm_table = tid;
+                       if (tid < 256)
+                               req.r.rtm_table = tid;
+                       else {
+                               req.r.rtm_table = RT_TABLE_UNSPEC;
+                               addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+                       }
                        table_ok = 1;
                } else if (strcmp(*argv, "dev") == 0 ||
                           strcmp(*argv, "oif") == 0) {
@@ -1022,7 +1029,7 @@ static int iproute_list_or_flush(int arg
                        filter.tb = tid;
                } else if (matches(*argv, "cached") == 0 ||
                           matches(*argv, "cloned") == 0) {
-                       filter.tb = -1;
+                       filter.cloned = 1;
                } else if (strcmp(*argv, "tos") == 0 ||
                           matches(*argv, "dsfield") == 0) {
                        __u32 tos;
diff --git a/ip/iprule.c b/ip/iprule.c
index ccf699f..6caf573 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -27,6 +27,7 @@ #include <string.h>
 
 #include "rt_names.h"
 #include "utils.h"
+#include "ip_common.h"
 
 extern struct rtnl_handle rth;
 
@@ -51,6 +52,7 @@ static int print_rule(const struct socka
        struct rtmsg *r = NLMSG_DATA(n);
        int len = n->nlmsg_len;
        int host_len = -1;
+       __u32 table;
        struct rtattr * tb[RTA_MAX+1];
        char abuf[256];
        SPRINT_BUF(b1);
@@ -129,8 +131,9 @@ static int print_rule(const struct socka
                fprintf(fp, "iif %s ", (char*)RTA_DATA(tb[RTA_IIF]));
        }
 
-       if (r->rtm_table)
-               fprintf(fp, "lookup %s ", rtnl_rttable_n2a(r->rtm_table, b1, 
sizeof(b1)));
+       table = rtm_get_table(r, tb);
+       if (table)
+               fprintf(fp, "lookup %s ", rtnl_rttable_n2a(table, b1, 
sizeof(b1)));
 
        if (tb[RTA_FLOW]) {
                __u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
@@ -257,7 +260,12 @@ static int iprule_modify(int cmd, int ar
                        NEXT_ARG();
                        if (rtnl_rttable_a2n(&tid, *argv))
                                invarg("invalid table ID\n", *argv);
-                       req.r.rtm_table = tid;
+                       if (tid < 256)
+                               req.r.rtm_table = tid;
+                       else {
+                               req.r.rtm_table = RT_TABLE_UNSPEC;
+                               addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+                       }
                        table_ok = 1;
                } else if (strcmp(*argv, "dev") == 0 ||
                           strcmp(*argv, "iif") == 0) {
diff --git a/lib/rt_names.c b/lib/rt_names.c
index 05046c2..2ff984a 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -23,6 +23,51 @@ #include <linux/rtnetlink.h>
 
 #include "rt_names.h"
 
+struct rtnl_hash_entry {
+       struct rtnl_hash_entry *next;
+       unsigned int            id;
+       char *                  name;
+};
+
+static void
+rtnl_hash_initialize(char *file, struct rtnl_hash_entry **hash, int size)
+{
+       struct rtnl_hash_entry *entry;
+       char buf[512];
+       FILE *fp;
+
+       fp = fopen(file, "r");
+       if (!fp)
+               return;
+       while (fgets(buf, sizeof(buf), fp)) {
+               char *p = buf;
+               int id;
+               char namebuf[512];
+
+               while (*p == ' ' || *p == '\t')
+                       p++;
+               if (*p == '#' || *p == '\n' || *p == 0)
+                       continue;
+               if (sscanf(p, "0x%x %s\n", &id, namebuf) != 2 &&
+                   sscanf(p, "0x%x %s #", &id, namebuf) != 2 &&
+                   sscanf(p, "%d %s\n", &id, namebuf) != 2 &&
+                   sscanf(p, "%d %s #", &id, namebuf) != 2) {
+                       fprintf(stderr, "Database %s is corrupted at %s\n",
+                               file, p);
+                       return;
+               }
+
+               if (id<0)
+                       continue;
+               entry = malloc(sizeof(*entry));
+               entry->id   = id;
+               entry->name = strdup(namebuf);
+               entry->next = hash[id & (size - 1)];
+               hash[id & (size - 1)] = entry;
+       }
+       fclose(fp);
+}
+
 static void rtnl_tab_initialize(char *file, char **tab, int size)
 {
        char buf[512];
@@ -57,7 +102,6 @@ static void rtnl_tab_initialize(char *fi
        fclose(fp);
 }
 
-
 static char * rtnl_rtprot_tab[256] = {
        [RTPROT_UNSPEC] = "none",
        [RTPROT_REDIRECT] ="redirect",
@@ -266,9 +310,14 @@ int rtnl_rtrealm_a2n(__u32 *id, char *ar
 }
 
 
+static struct rtnl_hash_entry dflt_table_entry  = { .id = 253, .name = 
"default" };
+static struct rtnl_hash_entry main_table_entry  = { .id = 254, .name = "main" 
};
+static struct rtnl_hash_entry local_table_entry = { .id = 255, .name = "local" 
};
 
-static char * rtnl_rttable_tab[256] = {
-       "unspec",
+static struct rtnl_hash_entry * rtnl_rttable_hash[256] = {
+       [253] = &dflt_table_entry,
+       [254] = &main_table_entry,
+       [255] = &local_table_entry,
 };
 
 static int rtnl_rttable_init;
@@ -276,26 +325,26 @@ static int rtnl_rttable_init;
 static void rtnl_rttable_initialize(void)
 {
        rtnl_rttable_init = 1;
-       rtnl_rttable_tab[255] = "local";
-       rtnl_rttable_tab[254] = "main";
-       rtnl_rttable_tab[253] = "default";
-       rtnl_tab_initialize("/etc/iproute2/rt_tables",
-                           rtnl_rttable_tab, 256);
+       rtnl_hash_initialize("/etc/iproute2/rt_tables",
+                            rtnl_rttable_hash, 256);
 }
 
-char * rtnl_rttable_n2a(int id, char *buf, int len)
+char * rtnl_rttable_n2a(__u32 id, char *buf, int len)
 {
-       if (id<0 || id>=256) {
-               snprintf(buf, len, "%d", id);
+       struct rtnl_hash_entry *entry;
+
+       if (id >= RT_TABLE_MAX) {
+               snprintf(buf, len, "%u", id);
                return buf;
        }
-       if (!rtnl_rttable_tab[id]) {
-               if (!rtnl_rttable_init)
-                       rtnl_rttable_initialize();
-       }
-       if (rtnl_rttable_tab[id])
-               return rtnl_rttable_tab[id];
-       snprintf(buf, len, "%d", id);
+       if (!rtnl_rttable_init)
+               rtnl_rttable_initialize();
+       entry = rtnl_rttable_hash[id & 255];
+       while (entry && entry->id != id)
+               entry = entry->next;
+       if (entry)
+               return entry->name;
+       snprintf(buf, len, "%u", id);
        return buf;
 }
 
@@ -303,8 +352,9 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
 {
        static char *cache = NULL;
        static unsigned long res;
+       struct rtnl_hash_entry *entry;
        char *end;
-       int i;
+       __u32 i;
 
        if (cache && strcmp(cache, arg) == 0) {
                *id = res;
@@ -315,9 +365,11 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
                rtnl_rttable_initialize();
 
        for (i=0; i<256; i++) {
-               if (rtnl_rttable_tab[i] &&
-                   strcmp(rtnl_rttable_tab[i], arg) == 0) {
-                       cache = rtnl_rttable_tab[i];
+               entry = rtnl_rttable_hash[i];
+               while (entry && strcmp(entry->name, arg))
+                       entry = entry->next;
+               if (entry) {
+                       cache = entry->name;
                        res = i;
                        *id = res;
                        return 0;
@@ -325,7 +377,7 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
        }
 
        i = strtoul(arg, &end, 0);
-       if (!end || end == arg || *end || i > 255)
+       if (!end || end == arg || *end || i > RT_TABLE_MAX)
                return -1;
        *id = i;
        return 0;

Reply via email to