Attached is backport of patch from jamal already in the 2.6 kernel - http://www.kernel.org/hg/linux-2.6/?cmd=changeset;node=061262a38daa1717e8343846bc9a8fd5712bd07a
It would be very nice to see it in the 2.4 kernel as well, as I keep receiving reports from users that "Quagga IPv6 is broken with 2.4 kernel". Without this patch it's not possible to support IPv6 routing in Quagga without race conditions. Users (including myself) are using this patch in production for some months and haven't noticed any issues with it. -- Hasso Tepper Elion Enterprises Ltd. WAN administrator
diff -Nur kernel-source-2.4.32-pre3-orig/include/net/ip6_fib.h kernel-source-2.4.32-pre3/include/net/ip6_fib.h --- kernel-source-2.4.32-pre3-orig/include/net/ip6_fib.h 2004-11-17 13:54:22.000000000 +0200 +++ kernel-source-2.4.32-pre3/include/net/ip6_fib.h 2005-08-19 14:20:00.000000000 +0300 @@ -171,13 +171,16 @@ extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern int fib6_del(struct rt6_info *rt, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern void fib6_run_gc(unsigned long dummy); diff -Nur kernel-source-2.4.32-pre3-orig/include/net/ip6_route.h kernel-source-2.4.32-pre3/include/net/ip6_route.h --- kernel-source-2.4.32-pre3-orig/include/net/ip6_route.h 2003-06-13 17:51:39.000000000 +0300 +++ kernel-source-2.4.32-pre3/include/net/ip6_route.h 2005-08-19 14:20:00.000000000 +0300 @@ -39,9 +39,11 @@ extern int ipv6_route_ioctl(unsigned int cmd, void *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, - struct nlmsghdr *); + struct nlmsghdr *, + struct netlink_skb_parms *req); extern int ip6_del_rt(struct rt6_info *, - struct nlmsghdr *); + struct nlmsghdr *, + struct netlink_skb_parms *req); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev); diff -Nur kernel-source-2.4.32-pre3-orig/net/ipv6/addrconf.c kernel-source-2.4.32-pre3/net/ipv6/addrconf.c --- kernel-source-2.4.32-pre3-orig/net/ipv6/addrconf.c 2004-11-17 13:54:22.000000000 +0200 +++ kernel-source-2.4.32-pre3/net/ipv6/addrconf.c 2005-08-19 14:20:00.000000000 +0300 @@ -883,7 +883,7 @@ if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) rtmsg.rtmsg_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL); + ip6_route_add(&rtmsg, NULL, NULL); } /* Create "default" multicast route to the interface */ @@ -900,7 +900,7 @@ rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_flags = RTF_UP; rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL); + ip6_route_add(&rtmsg, NULL, NULL); } static void sit_route_add(struct net_device *dev) @@ -917,7 +917,7 @@ rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL); + ip6_route_add(&rtmsg, NULL, NULL); } static void addrconf_add_lroute(struct net_device *dev) @@ -1009,7 +1009,7 @@ if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (pinfo->onlink == 0 || valid_lft == 0) { - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); rt = NULL; } else { rt->rt6i_expires = rt_expires; @@ -1592,7 +1592,7 @@ rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex; - ip6_route_add(&rtmsg, NULL); + ip6_route_add(&rtmsg, NULL, NULL); } out: diff -Nur kernel-source-2.4.32-pre3-orig/net/ipv6/ip6_fib.c kernel-source-2.4.32-pre3/net/ipv6/ip6_fib.c --- kernel-source-2.4.32-pre3-orig/net/ipv6/ip6_fib.c 2004-11-17 13:54:22.000000000 +0200 +++ kernel-source-2.4.32-pre3/net/ipv6/ip6_fib.c 2005-08-19 14:20:00.000000000 +0300 @@ -424,7 +424,7 @@ */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -482,7 +482,7 @@ *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, nlh); + inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); rt6_stats.fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { @@ -506,7 +506,8 @@ * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, + struct netlink_skb_parms *req) { struct fib6_node *fn; int err = -ENOMEM; @@ -579,7 +580,7 @@ } #endif - err = fib6_add_rt2node(fn, rt, nlh); + err = fib6_add_rt2node(fn, rt, nlh, req); if (err == 0) { fib6_start_gc(rt); @@ -888,7 +889,7 @@ } static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nlmsghdr *nlh) + struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; @@ -943,11 +944,11 @@ if (atomic_read(&rt->rt6i_ref) != 1) BUG(); } - inet6_rt_notify(RTM_DELROUTE, rt, nlh); + inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); rt6_release(rt); } -int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh) +int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -972,7 +973,7 @@ for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { if (*rtp == rt) { - fib6_del_route(fn, rtp, nlh); + fib6_del_route(fn, rtp, nlh, req); return 0; } } @@ -1101,7 +1102,7 @@ res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, NULL, NULL); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: [EMAIL PROTECTED] err=%d\n", rt, rt->rt6i_node, res); diff -Nur kernel-source-2.4.32-pre3-orig/net/ipv6/ndisc.c kernel-source-2.4.32-pre3/net/ipv6/ndisc.c --- kernel-source-2.4.32-pre3-orig/net/ipv6/ndisc.c 2004-11-17 13:54:22.000000000 +0200 +++ kernel-source-2.4.32-pre3/net/ipv6/ndisc.c 2005-08-19 14:20:00.000000000 +0300 @@ -876,7 +876,7 @@ /* It is safe only because we aer in BH */ dst_release(&rt->u.dst); - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); } } } else { @@ -962,7 +962,7 @@ rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); if (rt && lifetime == 0) { - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); rt = NULL; } diff -Nur kernel-source-2.4.32-pre3-orig/net/ipv6/route.c kernel-source-2.4.32-pre3/net/ipv6/route.c --- kernel-source-2.4.32-pre3-orig/net/ipv6/route.c 2004-11-17 13:54:22.000000000 +0200 +++ kernel-source-2.4.32-pre3/net/ipv6/route.c 2005-08-19 14:20:00.000000000 +0300 @@ -325,12 +325,12 @@ be destroyed. */ -static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh) +static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { int err; write_lock_bh(&rt6_lock); - err = fib6_add(&ip6_routing_table, rt, nlh); + err = fib6_add(&ip6_routing_table, rt, nlh, req); write_unlock_bh(&rt6_lock); return err; @@ -341,7 +341,7 @@ */ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, - struct in6_addr *saddr) + struct in6_addr *saddr, struct netlink_skb_parms *req) { int err; struct rt6_info *rt; @@ -373,7 +373,7 @@ dst_hold(&rt->u.dst); - err = rt6_ins(rt, NULL); + err = rt6_ins(rt, NULL, req); if (err == 0) return rt; @@ -479,7 +479,8 @@ read_unlock_bh(&rt6_lock); rt = rt6_cow(rt, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr); + &skb->nh.ipv6h->saddr, + &NETLINK_CB(skb)); if (rt->u.dst.error != -EEXIST || --attempts <= 0) goto out2; @@ -558,7 +559,7 @@ read_unlock_bh(&rt6_lock); rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr, - fl->nl_u.ip6_u.saddr); + fl->nl_u.ip6_u.saddr, NULL); if (rt->u.dst.error != -EEXIST || --attempts <= 0) goto out2; @@ -619,7 +620,7 @@ if (rt) { if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); else dst_release(dst); } @@ -712,7 +713,7 @@ * */ -int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh) +int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { int err; struct rtmsg *r; @@ -865,7 +866,7 @@ if (rt->u.dst.advmss > 65535-20) rt->u.dst.advmss = 65535; rt->u.dst.dev = dev; - return rt6_ins(rt, nlh); + return rt6_ins(rt, nlh, req); out: if (dev) @@ -874,7 +875,7 @@ return err; } -int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh) +int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { int err; @@ -886,13 +887,13 @@ dst_release(&rt->u.dst); - err = fib6_del(rt, nlh); + err = fib6_del(rt, nlh, req); write_unlock_bh(&rt6_lock); return err; } -int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh) +int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct fib6_node *fn; struct rt6_info *rt; @@ -919,7 +920,7 @@ dst_hold(&rt->u.dst); read_unlock_bh(&rt6_lock); - return ip6_del_rt(rt, nlh); + return ip6_del_rt(rt, nlh, req); } } read_unlock_bh(&rt6_lock); @@ -1021,11 +1022,11 @@ rt->u.dst.advmss = 65535; nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev); - if (rt6_ins(nrt, NULL)) + if (rt6_ins(nrt, NULL, NULL)) goto out; if (rt->rt6i_flags&RTF_CACHE) { - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); return; } @@ -1087,7 +1088,7 @@ 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { - nrt = rt6_cow(rt, daddr, saddr); + nrt = rt6_cow(rt, daddr, saddr, NULL); if (!nrt->u.dst.error) { nrt->u.dst.pmtu = pmtu; /* According to RFC 1981, detecting PMTU increase shouldn't be @@ -1111,7 +1112,7 @@ dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; nrt->u.dst.pmtu = pmtu; - rt6_ins(nrt, NULL); + rt6_ins(nrt, NULL, NULL); } out: @@ -1184,7 +1185,7 @@ rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL); + ip6_route_add(&rtmsg, NULL, NULL); return rt6_get_dflt_router(gwaddr, dev); } @@ -1210,7 +1211,7 @@ read_unlock_bh(&rt6_lock); - ip6_del_rt(rt, NULL); + ip6_del_rt(rt, NULL, NULL); goto restart; } @@ -1236,10 +1237,10 @@ rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL); + err = ip6_route_add(&rtmsg, NULL, NULL); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL); + err = ip6_route_del(&rtmsg, NULL, NULL); break; default: err = -EINVAL; @@ -1296,7 +1297,7 @@ ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt6_ins(rt, NULL); + rt6_ins(rt, NULL, NULL); return 0; } @@ -1313,7 +1314,7 @@ rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1); if (rt) { if (rt->rt6i_dst.plen == 128) - err = ip6_del_rt(rt, NULL); + err = ip6_del_rt(rt, NULL, NULL); else dst_release(&rt->u.dst); } @@ -1429,7 +1430,7 @@ nrt->rt6i_flags |= RTF_CACHE; dst_hold(&nrt->u.dst); - err = rt6_ins(nrt, NULL); + err = rt6_ins(nrt, NULL, NULL); if (err) nrt->u.dst.error = err; return nrt; @@ -1556,7 +1557,7 @@ if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh); + return ip6_route_del(&rtmsg, nlh, &NETLINK_CB(skb)); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1566,7 +1567,7 @@ if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh); + return ip6_route_add(&rtmsg, nlh, &NETLINK_CB(skb)); } struct rt6_rtnl_dump_arg @@ -1576,11 +1577,8 @@ }; static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, - struct in6_addr *src, - int iif, - int type, u32 pid, u32 seq, - struct nlmsghdr *in_nlh, int prefix) + struct in6_addr *dst, struct in6_addr *src, + int iif, int type, u32 pid, u32 seq, int prefix) { struct rtmsg *rtm; struct nlmsghdr *nlh; @@ -1593,9 +1591,6 @@ return 1; } } - if (!pid && in_nlh) { - pid = in_nlh->nlmsg_pid; - } nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm)); rtm = NLMSG_DATA(nlh); @@ -1683,7 +1678,7 @@ return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, - NULL, prefix); + prefix); } static int fib6_dump_node(struct fib6_walker_t *w) @@ -1834,7 +1829,7 @@ fl.nl_u.ip6_u.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, nlh, 0); + nlh->nlmsg_seq, 0); if (err < 0) { err = -EMSGSIZE; goto out_free; @@ -1850,17 +1845,25 @@ goto out; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, + struct netlink_skb_parms *req) { struct sk_buff *skb; int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + u32 pid = current->pid; + u32 seq = 0; + + if (req) + pid = req->pid; + if (nlh) + seq = nlh->nlmsg_seq; skb = alloc_skb(size, gfp_any()); if (!skb) { netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); return; } - if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) { + if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0) < 0) { kfree_skb(skb); netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); return;