Mark, is this the panic you were hitting? PANIC : Bad list head 0xffffff800057e720 first->prev != head cpuid = 6 KDB: stack backtrace: kdb_backtrace() at kdb_backtrace+0x3e panic() at panic+0x479 nd6_prelist_add() at nd6_prelist_add+0x236 prelist_remove() at prelist_remove+0x4c5 nd6_ra_input() at nd6_ra_input+0x8c2 icmp6_input() at icmp6_input+0x149e ip6_input() at ip6_input+0x14b2 netisr_dispatch_src() at netisr_dispatch_src+0x14a netisr_dispatch() at netisr_dispatch+0x20 ether_demux() at ether_demux+0x281
-vijay On Mon, Feb 18, 2013 at 7:05 PM, Mark Johnston <ma...@freebsd.org> wrote: > Hi Everyone, > > For the past little while I've been tracking down some memory corruption > issues that seem to be caused by a double free that can happen when > destroying an IPv6-enabled interface or when removing an IPv6 address > from an interface. > > The problem seems to be caused by a lack of locking for nd_prefix, the > global IPv6 prefix list. There's a callout that periodically executes > nd6_timer(), which purges expired prefixes (among other things). There's > nothing preventing an expired prefix from being freed twice if a user > happens to destroy the corresponding address or IP address at the same > time. In fact, a prefix with an infinite lifetime can be double freed as > well, since the first thing prelist_remove() does is set the input > prefix's vltime field to 0. > > I'd like to fix this, and the patch below is my attempt at adding some > locking to accesses of the prefix and default router lists. It's only > received very light testing so far, but I was hoping that others could > either review/test the patch, or suggest alternative approaches to > fixing this. My approach here has been to add two rw locks - one for > the prefix list and one for the default router list. I'm not very > familiar with the NDP code so I'm very much open to comments and > suggestions. :) > > Thanks, > -Mark > > diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c > index e260e5d..3cb327c 100644 > --- a/sys/netinet6/in6.c > +++ b/sys/netinet6/in6.c > @@ -805,8 +805,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, > */ > pr = ia->ia6_ndpr; > in6_purgeaddr(&ia->ia_ifa); > - if (pr && pr->ndpr_refcnt == 0) > + if (pr && pr->ndpr_refcnt == 0) { > + ND_PREFIX_WLOCK(); > prelist_remove(pr); > + ND_PREFIX_WUNLOCK(); > + } > EVENTHANDLER_INVOKE(ifaddr_event, ifp); > break; > } > diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c > index fd420d8..d96340c 100644 > --- a/sys/netinet6/nd6.c > +++ b/sys/netinet6/nd6.c > @@ -117,6 +117,9 @@ static int nd6_inuse, nd6_allocated; > VNET_DEFINE(struct nd_drhead, nd_defrouter); > VNET_DEFINE(struct nd_prhead, nd_prefix); > > +struct rwlock nd_prefix_rwlock; > +struct rwlock nd_defrouter_rwlock; > + > VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; > #define V_nd6_recalc_reachtm_interval > VNET(nd6_recalc_reachtm_interval) > > @@ -143,6 +146,7 @@ nd6_init(void) > { > int i; > > + rw_init(&nd_prefix_rwlock, "nd_prefix rwlock"); > LIST_INIT(&V_nd_prefix); > > all1_sa.sin6_family = AF_INET6; > @@ -151,6 +155,7 @@ nd6_init(void) > all1_sa.sin6_addr.s6_addr[i] = 0xff; > > /* initialization of the default router list */ > + rw_init(&nd_defrouter_rwlock, "nd_defrouter rwlock"); > TAILQ_INIT(&V_nd_defrouter); > > /* start timer */ > @@ -586,10 +591,14 @@ nd6_timer(void *arg) > nd6_timer, curvnet); > > /* expire default router list */ > + ND_PREFIX_RLOCK(); > + ND_DEFRTR_WLOCK(); > TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { > if (dr->expire && dr->expire < time_second) > defrtrlist_del(dr); > } > + ND_DEFRTR_WUNLOCK(); > + ND_PREFIX_RUNLOCK(); > > /* > * expire interface addresses. > @@ -664,6 +673,7 @@ nd6_timer(void *arg) > } > > /* expire prefix list */ > + ND_PREFIX_WLOCK(); > LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { > /* > * check prefix lifetime. > @@ -680,6 +690,7 @@ nd6_timer(void *arg) > prelist_remove(pr); > } > } > + ND_PREFIX_WUNLOCK(); > CURVNET_RESTORE(); > } > > @@ -770,6 +781,8 @@ nd6_purge(struct ifnet *ifp) > * in the routing table, in order to keep additional side effects as > * small as possible. > */ > + ND_PREFIX_RLOCK(); > + ND_DEFRTR_WLOCK(); > TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { > if (dr->installed) > continue; > @@ -785,8 +798,11 @@ nd6_purge(struct ifnet *ifp) > if (dr->ifp == ifp) > defrtrlist_del(dr); > } > + ND_DEFRTR_WUNLOCK(); > + ND_PREFIX_RUNLOCK(); > > /* Nuke prefix list entries toward ifp */ > + ND_PREFIX_WLOCK(); > LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { > if (pr->ndpr_ifp == ifp) { > /* > @@ -808,6 +824,7 @@ nd6_purge(struct ifnet *ifp) > prelist_remove(pr); > } > } > + ND_PREFIX_WUNLOCK(); > > /* cancel default outgoing interface setting */ > if (V_nd6_defifindex == ifp->if_index) > @@ -898,6 +915,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, > struct ifnet *ifp) > * If the address matches one of our on-link prefixes, it should be a > * neighbor. > */ > + ND_PREFIX_RLOCK(); > LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { > if (pr->ndpr_ifp != ifp) > continue; > @@ -929,9 +947,12 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, > struct ifnet *ifp) > } > > if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, > - &addr->sin6_addr, &pr->ndpr_mask)) > + &addr->sin6_addr, &pr->ndpr_mask)) { > + ND_PREFIX_RUNLOCK(); > return (1); > + } > } > + ND_PREFIX_RUNLOCK(); > > /* > * If the address is assigned on the node of the other side of > @@ -1229,6 +1250,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > * obsolete API, use sysctl under net.inet6.icmp6 > */ > bzero(drl, sizeof(*drl)); > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > if (i >= DRLSTSIZ) > break; > @@ -1241,6 +1263,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > drl->defrouter[i].if_index = dr->ifp->if_index; > i++; > } > + ND_DEFRTR_RUNLOCK(); > break; > case SIOCGPRLST_IN6: > /* > @@ -1256,6 +1279,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > * how about separating ioctls into two? > */ > bzero(oprl, sizeof(*oprl)); > + ND_PREFIX_RLOCK(); > LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { > struct nd_pfxrouter *pfr; > int j; > @@ -1301,6 +1325,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > > i++; > } > + ND_PREFIX_RUNLOCK(); > > break; > case OSIOCGIFINFO_IN6: > @@ -1449,6 +1474,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > /* flush all the prefix advertised by routers */ > struct nd_prefix *pr, *next; > > + ND_PREFIX_WLOCK(); > LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { > struct in6_ifaddr *ia, *ia_next; > > @@ -1467,6 +1493,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > } > prelist_remove(pr); > } > + ND_PREFIX_WUNLOCK(); > break; > } > case SIOCSRTRFLUSH_IN6: > @@ -1475,9 +1502,13 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) > struct nd_defrouter *dr, *next; > > defrouter_reset(); > + ND_PERFIX_RLOCK(); > + ND_DEFRTR_WLOCK(); > TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { > defrtrlist_del(dr); > } > + ND_DEFRTR_WUNLOCK(); > + ND_PREFIX_RUNLOCK(); > defrouter_select(); > break; > } > @@ -2268,23 +2299,30 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) > d.rtaddr.sin6_family = AF_INET6; > d.rtaddr.sin6_len = sizeof(d.rtaddr); > > + error = sysctl_wire_old_buffer(req, 0); > + if (error != 0) > + return (error); > + > /* > * XXX locking > */ > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > d.rtaddr.sin6_addr = dr->rtaddr; > error = sa6_recoverscope(&d.rtaddr); > if (error != 0) > - return (error); > + break; > d.flags = dr->flags; > d.rtlifetime = dr->rtlifetime; > d.expire = dr->expire; > d.if_index = dr->ifp->if_index; > error = SYSCTL_OUT(req, &d, sizeof(d)); > if (error != 0) > - return (error); > + break; > } > - return (0); > + ND_DEFRTR_RUNLOCK(); > + > + return (error); > } > > static int > @@ -2307,9 +2345,14 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) > s6.sin6_family = AF_INET6; > s6.sin6_len = sizeof(s6); > > + error = sysctl_wire_old_buffer(req, 0); > + if (error != 0) > + return (error); > + > /* > * XXX locking > */ > + ND_PREFIX_RLOCK(); > LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { > p.prefix = pr->ndpr_prefix; > if (sa6_recoverscope(&p.prefix)) { > @@ -2341,7 +2384,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) > p.advrtrs++; > error = SYSCTL_OUT(req, &p, sizeof(p)); > if (error != 0) > - return (error); > + break; > LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { > s6.sin6_addr = pfr->router->rtaddr; > if (sa6_recoverscope(&s6)) > @@ -2349,9 +2392,13 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) > "scope error in prefix list (%s)\n", > ip6_sprintf(ip6buf, > &pfr->router->rtaddr)); > error = SYSCTL_OUT(req, &s6, sizeof(s6)); > - if (error != 0) > + if (error != 0) { > + ND_PREFIX_RUNLOCK(); > return (error); > + } > } > } > - return (0); > + ND_PREFIX_RUNLOCK(); > + > + return (error); > } > diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h > index 94202e1..bc5ff2d 100644 > --- a/sys/netinet6/nd6.h > +++ b/sys/netinet6/nd6.h > @@ -38,6 +38,7 @@ > #define RTF_ANNOUNCE RTF_PROTO2 > #endif > > +#include <sys/_rwlock.h> > #include <sys/queue.h> > #include <sys/callout.h> > > @@ -341,6 +342,35 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861); > #define V_nd6_debug VNET(nd6_debug) > #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) > > +/* Lock to protect access to the nd_prefix list. */ > +extern struct rwlock nd_prefix_rwlock; > +#define ND_PREFIX_RLOCK() rw_rlock(&nd_prefix_rwlock) > +#define ND_PREFIX_RUNLOCK() rw_runlock(&nd_prefix_rwlock) > +#define ND_PREFIX_WLOCK() rw_wlock(&nd_prefix_rwlock) > +#define ND_PREFIX_WUNLOCK() rw_wunlock(&nd_prefix_rwlock) > +#define ND_PREFIX_LOCK_ASSERT() rw_assert(&nd_prefix_rwlock, > RA_LOCKED) > +#define ND_PREFIX_RLOCK_ASSERT() rw_assert(&nd_prefix_rwlock, > RA_RLOCKED) > +#define ND_PREFIX_WLOCK_ASSERT() rw_assert(&nd_prefix_rwlock, > RA_WLOCKED) > +#define ND_PREFIX_WLOCK_OWNED() rw_wowned(&nd_prefix_rwlock) > + > +/* > + * Lock to protect access to the nd_defrouter list. A shared lock should be > + * acquired on the prefix list before acquiring an exclusive lock on the > router > + * list and calling defrouterlist_del() in order to avoid lock order > reversals. > + */ > +extern struct rwlock nd_defrouter_rwlock; > +#define ND_DEFRTR_RLOCK() rw_rlock(&nd_defrouter_rwlock) > +#define ND_DEFRTR_RUNLOCK() > rw_runlock(&nd_defrouter_rwlock) > +#define ND_DEFRTR_WLOCK() rw_wlock(&nd_defrouter_rwlock) > +#define ND_DEFRTR_WUNLOCK() > rw_wunlock(&nd_defrouter_rwlock) > +#define ND_DEFRTR_LOCK_ASSERT() > rw_assert(&nd_defrouter_rwlock, \ > + RA_LOCKED) > +#define ND_DEFRTR_RLOCK_ASSERT() > rw_assert(&nd_defrouter_rwlock, \ > + RA_RLOCKED) > +#define ND_DEFRTR_WLOCK_ASSERT() > rw_assert(&nd_defrouter_rwlock, \ > + RA_WLOCKED) > +#define ND_DEFRTR_WLOCK_OWNED() > rw_wowned(&nd_defrouter_rwlock) > + > #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) > > VNET_DECLARE(struct callout, nd6_timer_ch); > diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c > index f6bae0c..e5c63b9 100644 > --- a/sys/netinet6/nd6_rtr.c > +++ b/sys/netinet6/nd6_rtr.c > @@ -499,14 +499,16 @@ defrouter_addreq(struct nd_defrouter *new) > struct nd_defrouter * > defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) > { > - struct nd_defrouter *dr; > + struct nd_defrouter *dr = NULL; > > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) > - return (dr); > + break; > } > + ND_DEFRTR_RUNLOCK(); > > - return (NULL); /* search failed */ > + return (dr); /* search failed */ > } > > /* > @@ -548,8 +550,10 @@ defrouter_reset(void) > { > struct nd_defrouter *dr; > > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) > defrouter_delreq(dr); > + ND_DEFRTR_RUNLOCK(); > > /* > * XXX should we also nuke any default routers in the kernel, by > @@ -574,11 +578,13 @@ defrtrlist_del(struct nd_defrouter *dr) > deldr = dr; > defrouter_delreq(dr); > } > + ND_DEFRTR_WLOCK_ASSERT(); > TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); > > /* > * Also delete all the pointers to the router in each prefix lists. > */ > + ND_PREFIX_RLOCK_ASSERT(); > LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { > struct nd_pfxrouter *pfxrtr; > if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) > @@ -636,6 +642,7 @@ defrouter_select(void) > * We just pick up the first reachable one (if any), assuming that > * the ordering rule of the list described in defrtrlist_update(). > */ > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > IF_AFDATA_RLOCK(dr->ifp); > if (selected_dr == NULL && > @@ -657,6 +664,8 @@ defrouter_select(void) > " is installed\n"); > } > } > + ND_DEFRTR_RUNLOCK(); > + > /* > * If none of the default routers was found to be reachable, > * round-robin the list regardless of preference. > @@ -758,7 +767,9 @@ defrtrlist_update(struct nd_defrouter *new) > * defrouter_select() below will handle routing > * changes later. > */ > + ND_DEFRTR_WLOCK(); > TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); > + ND_DEFRTR_WUNLOCK(); > n = dr; > goto insert; > } > @@ -784,6 +795,7 @@ insert: > */ > > /* insert at the end of the group */ > + ND_DEFRTR_WLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > if (rtpref(n) > rtpref(dr)) > break; > @@ -792,6 +804,7 @@ insert: > TAILQ_INSERT_BEFORE(dr, n, dr_entry); > else > TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); > + ND_DEFRTR_WUNLOCK(); > > defrouter_select(); > > @@ -839,6 +852,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key) > { > struct nd_prefix *search; > > + ND_PREFIX_RLOCK(); > LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) { > if (key->ndpr_ifp == search->ndpr_ifp && > key->ndpr_plen == search->ndpr_plen && > @@ -847,6 +861,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key) > break; > } > } > + ND_PREFIX_RUNLOCK(); > > return (search); > } > @@ -887,7 +902,9 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct > nd_defrouter *dr, > new->ndpr_mask.s6_addr32[i]; > > /* link ndpr_entry to nd_prefix list */ > + ND_PREFIX_WLOCK(); > LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); > + ND_PREFIX_WUNLOCK(); > > /* ND_OPT_PI_FLAG_ONLINK processing */ > if (new->ndpr_raf_onlink) { > @@ -938,6 +955,7 @@ prelist_remove(struct nd_prefix *pr) > return; /* notice here? */ > > /* unlink ndpr_entry from nd_prefix list */ > + ND_PREFIX_WLOCK_ASSERT(); > LIST_REMOVE(pr, ndpr_entry); > > /* free list of routers that adversed the prefix */ > @@ -1339,6 +1357,8 @@ pfxlist_onlink_check() > * Check if there is a prefix that has a reachable advertising > * router. > */ > + if (!ND_PREFIX_WLOCK_OWNED()) > + ND_PREFIX_RLOCK(); > LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { > if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) > break; > @@ -1349,6 +1369,7 @@ pfxlist_onlink_check() > * that does not advertise any prefixes. > */ > if (pr == NULL) { > + ND_DEFRTR_RLOCK(); > TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { > struct nd_prefix *pr0; > > @@ -1359,6 +1380,7 @@ pfxlist_onlink_check() > if (pfxrtr != NULL) > break; > } > + ND_DEFRTR_RUNLOCK(); > } > if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { > /* > @@ -1454,6 +1476,8 @@ pfxlist_onlink_check() > } > } > } > + if (!ND_PREFIX_WLOCK_OWNED()) > + ND_PREFIX_RUNLOCK(); > > /* > * Changes on the prefix status might affect address status as well. > @@ -1618,6 +1642,7 @@ nd6_prefix_onlink(struct nd_prefix *pr) > * Although such a configuration is expected to be rare, we explicitly > * allow it. > */ > + ND_PREFIX_RLOCK(); > LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { > if (opr == pr) > continue; > @@ -1627,9 +1652,12 @@ nd6_prefix_onlink(struct nd_prefix *pr) > > if (opr->ndpr_plen == pr->ndpr_plen && > in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, > - &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) > + &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) { > + ND_PREFIX_RUNLOCK(); > return (0); > + } > } > + ND_PREFIX_RUNLOCK(); > > /* > * We prefer link-local addresses as the associated interface address. > @@ -1730,6 +1758,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) > * If there's one, try to make the prefix on-link on the > * interface. > */ > + ND_PREFIX_LOCK_ASSERT(); > LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) { > if (opr == pr) > continue; > _______________________________________________ > freebsd-net@freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/freebsd-net > To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org" _______________________________________________ freebsd-net@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-net To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"