Hi Everybody

Please find attached the patch files which contains the backported code for
ECMP for FreeBSD 7.2. I have back ported the code from 8.0. I have done some
basic testing with these patches rolled in. These are the phase 1 fixes and
i would continue to work on this and back port few other stuff too. It would
be great if someone could roll in this to 7.2, test it and give me their
valuable feedback.

This is my first Patch for FreeBSD community and i am really excited about
this and look forward to it.

Thanks for your time.

Cheers,
  - Balaji
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c net/radix.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix.c	2010-02-04 22:40:29.000000000 +0530
+++ net/radix.c	2010-02-07 16:06:56.053687311 +0530
@@ -48,6 +48,10 @@
 #include <net/radix.h>
 #endif
 
+/* ECMP Changes Begin */
+#include <net/radix_mpath.h>
+/* ECMP Changes End */
+
 static int	rn_walktree_from(struct radix_node_head *h, void *a, void *m,
 		    walktree_f_t *f, void *w);
 static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
@@ -630,6 +634,22 @@
 	saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
 	if (keyduplicated) {
 		for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
+
+/* ECMP Changes Begin */
+			/* permit multipath, if enabled for the family */
+			if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
+				/*
+				 * go down to the end of multipaths, so that
+				 * new entry goes into the end of rn_dupedkey
+				 * chain.
+				 */
+				do {
+					t = tt;
+					tt = tt->rn_dupedkey;
+				} while (tt && t->rn_mask == tt->rn_mask);
+				break;
+			}
+/* ECMP Changes End */
 			if (tt->rn_mask == netmask)
 				return (0);
 			if (netmask == 0 ||
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c net/radix_mpath.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/radix_mpath.c	2010-02-05 08:30:52.000000000 +0530
+++ net/radix_mpath.c	2010-02-06 18:02:31.507943679 +0530
@@ -54,7 +54,7 @@
 /*
  * give some jitter to hash, to avoid synchronization between routers
  */
-static uint32_t hashjitter;
+/* static uint32_t hashjitter; */
 
 int
 rn_mpath_capable(struct radix_node_head *rnh)
@@ -320,8 +320,9 @@
 #endif
 
 extern int	in6_inithead(void **head, int off);
-extern int	in_inithead(void **head, int off);
+extern int	in_inthead(void **head, int off);
 
+#if 0
 #ifdef INET
 int
 rn4_mpath_inithead(void **head, int off)
@@ -352,5 +353,6 @@
 	} else
 		return 0;
 }
+#endif
 
 #endif
Only in net: radix_mpath.h
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c net/route.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.c	2010-02-04 22:40:29.000000000 +0530
+++ net/route.c	2010-02-07 16:05:44.442935597 +0530
@@ -886,6 +886,113 @@
 	return (rtrequest1_fib(req, info, ret_nrt, 0));
 }
 
+/* ECMP Changes Begin */
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+    struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+	/*
+	 * if we got multipath routes, we require users to specify
+	 * a matching RTAX_GATEWAY.
+	 */
+	struct rtentry *rt, *rto = NULL;
+	register struct radix_node *rn;
+	int error = 0;
+
+	rn = rnh->rnh_matchaddr(dst, rnh);
+	if (rn == NULL)
+		return (ESRCH);
+	rto = rt = RNTORT(rn);
+	rt = rt_mpath_matchgate(rt, gateway);
+	if (rt == NULL)
+		return (ESRCH);
+	/*
+	 * this is the first entry in the chain
+	 */
+	if (rto == rt) {
+		rn = rn_mpath_next((struct radix_node *)rt);
+		/*
+		 * there is another entry, now it's active
+		 */
+		if (rn) {
+			rto = RNTORT(rn);
+			RT_LOCK(rto);
+			rto->rt_flags |= RTF_UP;
+			RT_UNLOCK(rto);
+		} else if (rt->rt_flags & RTF_GATEWAY) {
+			/*
+			 * For gateway routes, we need to 
+			 * make sure that we we are deleting
+			 * the correct gateway. 
+			 * rt_mpath_matchgate() does not 
+			 * check the case when there is only
+			 * one route in the chain.  
+			 */
+			if (gateway &&
+			    (rt->rt_gateway->sa_len != gateway->sa_len ||
+				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
+				error = ESRCH;
+			else {
+				/*
+				 * remove from tree before returning it
+				 * to the caller
+				 */
+				rn = rnh->rnh_deladdr(dst, netmask, rnh);
+				KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
+				goto gwdelete;
+			}
+			
+		}
+		/*
+		 * use the normal delete code to remove
+		 * the first entry
+		 */
+		if (req != RTM_DELETE) 
+			goto nondelete;
+
+		error = ENOENT;
+		goto done;
+	}
+		
+	/*
+	 * if the entry is 2nd and on up
+	 */
+	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+		panic ("rtrequest1: rt_mpath_deldup");
+gwdelete:
+	RT_LOCK(rt);
+	RT_ADDREF(rt);
+	if (req == RTM_DELETE) {
+		rt->rt_flags &= ~RTF_UP;
+		/*
+		 * One more rtentry floating around that is not
+		 * linked to the routing table. rttrash will be decremented
+		 * when RTFREE(rt) is eventually called.
+		 */
+		rttrash++;
+	}
+	
+nondelete:
+	if (req != RTM_DELETE)
+		panic("unrecognized request %d", req);
+	
+
+	/*
+	 * If the caller wants it, then it can have it,
+	 * but it's up to it to free the rtentry as we won't be
+	 * doing it.
+	 */
+	if (ret_nrt) {
+		*ret_nrt = rt;
+		RT_UNLOCK(rt);
+	} else
+		RTFREE_LOCKED(rt);
+done:
+	return (error);
+}
+/* ECMP Changes End */
+
+
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
@@ -923,6 +1030,19 @@
 	}
 	switch (req) {
 	case RTM_DELETE:
+
+/* ECMP Changes Begin */
+		if (rn_mpath_capable(rnh)) {
+			error = rn_mpath_update(req, info, rnh, ret_nrt);
+			/*
+			 * "bad" holds true for the success case
+			 * as well
+			 */
+			if (error != ENOENT)
+				goto bad;
+		}
+/* ECMP Changes End */
+
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
@@ -1046,6 +1166,20 @@
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
 
+	/* ECMP Changes Begin */
+		/* do not permit exactly the same dst/mask/gw pair */
+		if (rn_mpath_capable(rnh) &&
+			rt_mpath_conflict(rnh, rt, netmask)) {
+			if (rt->rt_ifa) {
+				IFAFREE(rt->rt_ifa);
+			}
+			Free(rt_key(rt));
+			RT_LOCK_DESTROY(rt);
+			uma_zfree(rtzone, rt);
+			senderr(EEXIST);
+		}
+	/* ECMP Changes  End */
+
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 		if (rn == NULL) {
@@ -1456,6 +1590,29 @@
 				/* this table doesn't exist but others might */
 				continue;
 			RADIX_NODE_HEAD_LOCK(rnh);
+/* ECMP Changes Begin */
+			if (rn_mpath_capable(rnh)) {
+
+				rn = rnh->rnh_matchaddr(dst, rnh);
+				if (rn == NULL) 
+					error = ESRCH;
+				else {
+					rt = RNTORT(rn);
+					/*
+					 * for interface route the
+					 * rt->rt_gateway is sockaddr_intf
+					 * for cloning ARP entries, so
+					 * rt_mpath_matchgate must use the
+					 * interface address
+					 */
+					rt = rt_mpath_matchgate(rt,
+					    ifa->ifa_addr);
+					if (!rt) 
+						error = ESRCH;
+				}
+			}
+			else
+/* ECMP Changes End */
 			rn = rnh->rnh_lookup(dst, netmask, rnh);
 			error = (rn == NULL ||
 			    (rn->rn_flags & RNF_ROOT) ||
@@ -1482,6 +1639,22 @@
 			 * notify any listening routing agents of the change
 			 */
 			RT_LOCK(rt);
+/* ECMP Changes Begin */
+			/*
+			 * in case address alias finds the first address
+			 * e.g. ifconfig bge0 192.103.54.246/24
+			 * e.g. ifconfig bge0 192.103.54.247/24
+			 * the address set in the route is 192.103.54.246
+			 * so we need to replace it with 192.103.54.247
+			 */
+			if (memcmp(rt->rt_ifa->ifa_addr,
+			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
+				IFAFREE(rt->rt_ifa);
+				IFAREF(ifa);
+				rt->rt_ifp = ifa->ifa_ifp;
+				rt->rt_ifa = ifa;
+			}
+/* ECMP Changes End */
 			rt_newaddrmsg(cmd, ifa, error, rt);
 			if (cmd == RTM_DELETE) {
 				/*
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h net/route.h
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/route.h	2010-02-04 22:40:29.000000000 +0530
+++ net/route.h	2010-02-06 17:19:10.672946004 +0530
@@ -58,6 +58,9 @@
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_pksent;	/* packets sent using this route */
+/* ECMP Changes Begin */
+	u_long  rmx_weight; 
+/* ECMP Changes End  */
 };
 
 struct rt_metrics {
@@ -101,6 +104,11 @@
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #endif
+
+/* ECMP Changes Begin */
+#include <net/radix_mpath.h>
+/* ECMP Changes End  */
+
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c net/rtsock.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/net/rtsock.c	2010-02-04 22:40:29.000000000 +0530
+++ net/rtsock.c	2010-02-06 16:34:43.700713152 +0530
@@ -536,6 +536,27 @@
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			senderr(ESRCH);
 		}
+
+/* ECMP Changes Begin */
+		/*
+		 * for RTM_CHANGE/LOCK, if we got multipath routes,
+		 * we require users to specify a matching RTAX_GATEWAY.
+		 *
+		 * for RTM_GET, gate is optional even with multipath.
+		 * if gate == NULL the first match is returned.
+		 * (no need to call rt_mpath_matchgate if gate == NULL)
+		 */
+		if (rn_mpath_capable(rnh) &&
+		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
+			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
+			if (!rt) {
+				RADIX_NODE_HEAD_UNLOCK(rnh);
+				senderr(ESRCH);
+			}
+		}
+
+/* ECMP Changes End */
+
 		RT_LOCK(rt);
 		RT_ADDREF(rt);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
diff -u -r /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/netinet/in_rmx.c netinet/in_rmx.c
--- /home/balaji/Codes/FreeBSD/7.2.0/7.2.0_unmodified/7.2.0/sys/netinet/in_rmx.c	2010-02-04 22:35:07.000000000 +0530
+++ netinet/in_rmx.c	2010-02-07 12:56:56.934968244 +0530
@@ -369,6 +369,9 @@
 	rnh->rnh_addaddr = in_addroute;
 	rnh->rnh_matchaddr = in_matroute;
 	rnh->rnh_close = in_clsroute;
+	/* ECMP Changes Begin */
+	rnh->rnh_multipath = 1;
+	/* ECMP Changes End */
 	if (_in_rt_was_here == 0 ) {
 		callout_init(&rtq_timer, CALLOUT_MPSAFE);
 		in_rtqtimo(rnh);	/* kick off timeout first time */
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to