Author: bms
Date: Wed May 27 18:57:13 2009
New Revision: 192923
URL: http://svn.freebsd.org/changeset/base/192923

Log:
  Merge final round of MLD changes from p4:
   ip6_input.c, in6.h:
   * Add netinet6-specific mbuf flag M_RTALERT_MLD, shadowing M_PROTO6.
    * Always set this flag if HBH Router Alert option is present for MLD,
      even when not forwarding.
  
   icmp6.c:
   * In icmp6_input(), spell m->m_pkthdr.rcvif as ifp to be consistent.
   * Use scope ID for verifying input. Do not apply SSM filters here, no inpcb.
    * Check for M_RTALERT_MLD when validating MLD traffic, as we can't see
      IPv6 hop options outside of ip6_input().
  
   in6_mcast.c:
   * Use KAME scope/zone ID in in6_multi.
     * Update net.inet6.ip6.mcast.filters implementation to use scope IDs
       for comparisons.
   * Fix scope ID treatment in multicast socket option processing.
     Scope IDs passed in from userland will be ignored as other less
     ambiguous APIs exist for specifying the link.
   * Tighten userland input checks in IPv6 SSM delta and full-state ops.
     * Source filter embedded scope IDs need to be revisited, for now
       just clear them and ignore them on input.
   * Adapt KAME behaviour of looking up the scope ID in the default zone
     for multicast leaves, when the interface is ambiguous.
  
   mld6.c:
   * Tighten origin checks on MLD traffic as per RFC3810 Section 6.2:
    * ip6_src MAY be the unspecified address for MLDv1 reports.
    * ip6_src MAY have link-local address scope for MLDv1 reports,
      MLDv1 queries, and MLDv2 queries.
    * Perform address field validation *before* accepting queries.
   * Use KAME scope/zone ID in query/report processing.
     * Break const correctness for mld_v1_input_report(), mld_v1_input_query()
       as we temporarily modify the input mbuf chain.
     * Clear the scope ID before handoff to userland MLD daemon.
   * Fix MLDv1 old querier present timer processing.
     With the protocol defaults, hosts should revert to MLDv2 after 260s.
   * Add net.inet6.mld.v1enable sysctl, default to on.
  
   ifmcstat.c:
   * Use sysctl by default; -K requests kvm(3) if so compiled.
  
   mld.4:
   * Connect man page to build.
  
  Tested using PCS.

Modified:
  head/share/man/man4/Makefile
  head/share/man/man4/multicast.4
  head/sys/netinet6/icmp6.c
  head/sys/netinet6/in6.h
  head/sys/netinet6/in6_mcast.c
  head/sys/netinet6/ip6_input.c
  head/sys/netinet6/mld6.c
  head/usr.sbin/ifmcstat/ifmcstat.8
  head/usr.sbin/ifmcstat/ifmcstat.c

Modified: head/share/man/man4/Makefile
==============================================================================
--- head/share/man/man4/Makefile        Wed May 27 18:54:31 2009        
(r192922)
+++ head/share/man/man4/Makefile        Wed May 27 18:57:13 2009        
(r192923)
@@ -191,6 +191,7 @@ MAN=        aac.4 \
        meteor.4 \
        mfi.4 \
        miibus.4 \
+       mld.4 \
        mlx.4 \
        mly.4 \
        mmc.4 \

Modified: head/share/man/man4/multicast.4
==============================================================================
--- head/share/man/man4/multicast.4     Wed May 27 18:54:31 2009        
(r192922)
+++ head/share/man/man4/multicast.4     Wed May 27 18:57:13 2009        
(r192923)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 13, 2009
+.Dd May 27, 2009
 .Dt MULTICAST 4
 .Os
 .\"
@@ -962,6 +962,7 @@ after the previous upcall.
 .Xr intro 4 ,
 .Xr ip 4 ,
 .Xr ip6 4 ,
+.Xr mld 4 ,
 .Xr pim 4
 .\"
 .Sh HISTORY
@@ -1002,6 +1003,8 @@ monitoring were implemented by
 in collaboration with
 .An Chris Brown
 (NextHop).
+The IGMPv3 and MLDv2 multicast support was implemented by
+.An Bruce Simpson .
 .Pp
 This manual page was written by
 .An Pavlin Radoslavov

Modified: head/sys/netinet6/icmp6.c
==============================================================================
--- head/sys/netinet6/icmp6.c   Wed May 27 18:54:31 2009        (r192922)
+++ head/sys/netinet6/icmp6.c   Wed May 27 18:57:13 2009        (r192923)
@@ -403,6 +403,7 @@ icmp6_input(struct mbuf **mp, int *offp,
        INIT_VNET_INET6(curvnet);
        INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
        struct mbuf *m = *mp, *n;
+       struct ifnet *ifp;
        struct ip6_hdr *ip6, *nip6;
        struct icmp6_hdr *icmp6, *nicmp6;
        int off = *offp;
@@ -410,6 +411,8 @@ icmp6_input(struct mbuf **mp, int *offp,
        int code, sum, noff;
        char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
+       ifp = m->m_pkthdr.rcvif;
+
 #ifndef PULLDOWN_TEST
        IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
        /* m might change if M_LOOP.  So, call mtod after this */
@@ -431,10 +434,8 @@ icmp6_input(struct mbuf **mp, int *offp,
         * Note: SSM filters are not applied for ICMPv6 traffic.
         */
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-               struct ifnet *ifp;
-               struct in6_multi *inm;
+               struct in6_multi        *inm;
 
-               ifp = m->m_pkthdr.rcvif;
                inm = in6m_lookup(ifp, &ip6->ip6_dst);
                if (inm == NULL) {
                        IP6STAT_INC(ip6s_notmember);
@@ -483,19 +484,19 @@ icmp6_input(struct mbuf **mp, int *offp,
        }
 
        ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
-       icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
+       icmp6_ifstat_inc(ifp, ifs6_in_msg);
        if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
+               icmp6_ifstat_inc(ifp, ifs6_in_error);
 
        switch (icmp6->icmp6_type) {
        case ICMP6_DST_UNREACH:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
+               icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
                switch (code) {
                case ICMP6_DST_UNREACH_NOROUTE:
                        code = PRC_UNREACH_NET;
                        break;
                case ICMP6_DST_UNREACH_ADMIN:
-                       icmp6_ifstat_inc(m->m_pkthdr.rcvif, 
ifs6_in_adminprohib);
+                       icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
                        code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
                        break;
                case ICMP6_DST_UNREACH_ADDR:
@@ -515,7 +516,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ICMP6_PACKET_TOO_BIG:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
+               icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
                /* validation is made in icmp6_mtudisc_update */
 
@@ -529,7 +530,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ICMP6_TIME_EXCEEDED:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
+               icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
                switch (code) {
                case ICMP6_TIME_EXCEED_TRANSIT:
                        code = PRC_TIMXCEED_INTRANS;
@@ -544,7 +545,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ICMP6_PARAM_PROB:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
+               icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
                switch (code) {
                case ICMP6_PARAMPROB_NEXTHEADER:
                        code = PRC_UNREACH_PROTOCOL;
@@ -560,7 +561,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ICMP6_ECHO_REQUEST:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
+               icmp6_ifstat_inc(ifp, ifs6_in_echo);
                if (code != 0)
                        goto badcode;
                if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
@@ -623,7 +624,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ICMP6_ECHO_REPLY:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
+               icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
                if (code != 0)
                        goto badcode;
                break;
@@ -633,11 +634,15 @@ icmp6_input(struct mbuf **mp, int *offp,
        case MLD_LISTENER_DONE:
        case MLDV2_LISTENER_REPORT:
                /*
-                * Drop MLD traffic which is not link-local.
+                * Drop MLD traffic which is not link-local, has a hop limit
+                * of greater than 1 hop, or which does not have the
+                * IPv6 HBH Router Alert option.
+                * As IPv6 HBH options are stripped in ip6_input() we must
+                * check an mbuf header flag.
                 * XXX Should we also sanity check that these messages
                 * were directed to a link-local multicast prefix?
                 */
-               if (ip6->ip6_hlim != 1)
+               if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
                        goto freeit;
                if (mld_input(m, off, icmp6len) != 0)
                        return (IPPROTO_DONE);
@@ -748,7 +753,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ND_ROUTER_SOLICIT:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
+               icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_router_solicit))
@@ -764,7 +769,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ND_ROUTER_ADVERT:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
+               icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_router_advert))
@@ -780,7 +785,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ND_NEIGHBOR_SOLICIT:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
+               icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_neighbor_solicit))
@@ -796,7 +801,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ND_NEIGHBOR_ADVERT:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
+               icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_neighbor_advert))
@@ -812,7 +817,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                break;
 
        case ND_REDIRECT:
-               icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
+               icmp6_ifstat_inc(ifp, ifs6_in_redirect);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_redirect))
@@ -840,7 +845,7 @@ icmp6_input(struct mbuf **mp, int *offp,
                    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
                    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
                    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
-                   m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
+                   ifp ? ifp->if_index : 0));
                if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
                        /* ICMPv6 error: MUST deliver it by spec... */
                        code = PRC_NCMDS;

Modified: head/sys/netinet6/in6.h
==============================================================================
--- head/sys/netinet6/in6.h     Wed May 27 18:54:31 2009        (r192922)
+++ head/sys/netinet6/in6.h     Wed May 27 18:57:13 2009        (r192923)
@@ -619,6 +619,7 @@ struct ip6_mtuinfo {
 #define        M_DECRYPTED     M_PROTO3
 #define        M_LOOP          M_PROTO4
 #define        M_AUTHIPDGM     M_PROTO5
+#define        M_RTALERT_MLD   M_PROTO6
 
 #ifdef _KERNEL
 struct cmsghdr;

Modified: head/sys/netinet6/in6_mcast.c
==============================================================================
--- head/sys/netinet6/in6_mcast.c       Wed May 27 18:54:31 2009        
(r192922)
+++ head/sys/netinet6/in6_mcast.c       Wed May 27 18:57:13 2009        
(r192923)
@@ -305,6 +305,10 @@ im6o_match_group(const struct ip6_moptio
  * Find an IPv6 multicast source entry for this imo which matches
  * the given group index for this socket, and source address.
  *
+ * XXX TODO: The scope ID, if present in src, is stripped before
+ * any comparison. We SHOULD enforce scope/zone checks where the source
+ * filter entry has a link scope.
+ *
  * NOTE: This does not check if the entry is in-mode, merely if
  * it exists, which may not be the desired behaviour.
  */
@@ -328,6 +332,7 @@ im6o_match_source(const struct ip6_mopti
 
        psa = (const sockunion_t *)src;
        find.im6s_addr = psa->sin6.sin6_addr;
+       in6_clearscope(&find.im6s_addr);                /* XXX */
        ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
 
        return ((struct in6_msource *)ims);
@@ -1159,6 +1164,20 @@ in6_mc_join_locked(struct ifnet *ifp, co
        char                     ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+#ifdef INVARIANTS
+       /*
+        * Sanity: Check scope zone ID was set for ifp, if and
+        * only if group is scoped to an interface.
+        */
+       KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr),
+           ("%s: not a multicast address", __func__));
+       if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
+           IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
+               KASSERT(mcaddr->s6_addr16[1] != 0,
+                   ("%s: scope zone ID not set", __func__));
+       }
+#endif
+
        IN6_MULTI_LOCK_ASSERT();
 
        CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
@@ -1360,6 +1379,8 @@ in6p_block_unblock_source(struct inpcb *
        if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
                return (EINVAL);
 
+       (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+
        /*
         * Check if we are actually a member of this group.
         */
@@ -1566,19 +1587,26 @@ in6p_get_source_filters(struct inpcb *in
        if (error)
                return (error);
 
-       if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+       if (msfr.msfr_group.ss_family != AF_INET6 ||
+           msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
+               return (EINVAL);
+
+       gsa = (sockunion_t *)&msfr.msfr_group;
+       if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
                return (EINVAL);
 
+       if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
+               return (EADDRNOTAVAIL);
        ifp = ifnet_byindex(msfr.msfr_ifindex);
        if (ifp == NULL)
-               return (EINVAL);
+               return (EADDRNOTAVAIL);
+       (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
        INP_WLOCK(inp);
 
        /*
         * Lookup group on the socket.
         */
-       gsa = (sockunion_t *)&msfr.msfr_group;
        idx = im6o_match_group(imo, ifp, &gsa->sa);
        if (idx == -1 || imo->im6o_mfilters == NULL) {
                INP_WUNLOCK(inp);
@@ -1803,6 +1831,12 @@ in6p_join_group(struct inpcb *inp, struc
        ssa = (sockunion_t *)&gsr.gsr_source;
        ssa->ss.ss_family = AF_UNSPEC;
 
+       /*
+        * Chew everything into struct group_source_req.
+        * Overwrite the port field if present, as the sockaddr
+        * being copied in may be matched with a binary comparison.
+        * Ignore passed-in scope ID.
+        */
        switch (sopt->sopt_name) {
        case IPV6_JOIN_GROUP: {
                struct ipv6_mreq mreq;
@@ -1846,16 +1880,20 @@ in6p_join_group(struct inpcb *inp, struc
                    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
                        return (EINVAL);
 
-               /*
-                * Overwrite the port field if present, as the sockaddr
-                * being copied in may be matched with a binary comparison.
-                */
-               gsa->sin6.sin6_port = 0;
                if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
                        if (ssa->sin6.sin6_family != AF_INET6 ||
                            ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
                                return (EINVAL);
+                       if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+                               return (EINVAL);
+                       /*
+                        * TODO: Validate embedded scope ID in source
+                        * list entry against passed-in ifp, if and only
+                        * if source list filter entry is iface or node local.
+                        */
+                       in6_clearscope(&ssa->sin6.sin6_addr);
                        ssa->sin6.sin6_port = 0;
+                       ssa->sin6.sin6_scope_id = 0;
                }
 
                if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
@@ -1870,34 +1908,22 @@ in6p_join_group(struct inpcb *inp, struc
                break;
        }
 
-#ifdef notyet
-       /*
-        * FIXME: Check for unspecified address (all groups).
-        * Do we have a normative reference for this 'feature'?
-        *
-        * We use the unspecified address to specify to accept
-        * all multicast addresses. Only super user is allowed
-        * to do this.
-        * XXX-BZ might need a better PRIV_NETINET_x for this
-        */
-       if (IN6_IS_ADDR_UNSPECIFIED(&gsa->sin6.sin6_addr)) {
-               error = priv_check(curthread, PRIV_NETINET_MROUTE);
-               if (error)
-               break;
-       } else
-#endif
        if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
                return (EINVAL);
 
        if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
                return (EADDRNOTAVAIL);
 
-#ifdef notyet
+       gsa->sin6.sin6_port = 0;
+       gsa->sin6.sin6_scope_id = 0;
+
        /*
-        * FIXME: Set interface scope in group address.
+        * Always set the scope zone ID on memberships created from userland.
+        * Use the passed-in ifp to do this.
+        * XXX The in6_setscope() return value is meaningless.
+        * XXX SCOPE6_LOCK() is taken by in6_setscope().
         */
-       (void)in6_setscope(&gsa->sin6.sin_addr, ifp, NULL);
-#endif
+       (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
        /*
         * MCAST_JOIN_SOURCE on an exclusive membership is an error.
@@ -2031,6 +2057,8 @@ static int
 in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
 {
        INIT_VNET_NET(curvnet);
+       INIT_VNET_INET6(curvnet);
+       struct ipv6_mreq                 mreq;
        struct group_source_req          gsr;
        sockunion_t                     *gsa, *ssa;
        struct ifnet                    *ifp;
@@ -2038,6 +2066,7 @@ in6p_leave_group(struct inpcb *inp, stru
        struct ip6_moptions             *imo;
        struct in6_msource              *ims;
        struct in6_multi                *inm;
+       uint32_t                         ifindex;
        size_t                           idx;
        int                              error, is_final;
 #ifdef KTR
@@ -2045,6 +2074,7 @@ in6p_leave_group(struct inpcb *inp, stru
 #endif
 
        ifp = NULL;
+       ifindex = 0;
        error = 0;
        is_final = 1;
 
@@ -2054,39 +2084,26 @@ in6p_leave_group(struct inpcb *inp, stru
        ssa = (sockunion_t *)&gsr.gsr_source;
        ssa->ss.ss_family = AF_UNSPEC;
 
+       /*
+        * Chew everything passed in up into a struct group_source_req
+        * as that is easier to process.
+        * Note: Any embedded scope ID in the multicast group passed
+        * in by userland is ignored, the interface index is the recommended
+        * mechanism to specify an interface; see below.
+        */
        switch (sopt->sopt_name) {
-       case IPV6_LEAVE_GROUP: {
-               struct ipv6_mreq mreq;
-
+       case IPV6_LEAVE_GROUP:
                error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
                    sizeof(struct ipv6_mreq));
                if (error)
                        return (error);
-
                gsa->sin6.sin6_family = AF_INET6;
                gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
                gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
-
-               if (mreq.ipv6mr_interface == 0) {
-#ifdef notyet
-                       /*
-                        * FIXME: Resolve scope ambiguity when interface
-                        * index is unspecified.
-                        */
-                       ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
-#else
-                       return (EADDRNOTAVAIL);
-#endif
-               } else {
-                       if (mreq.ipv6mr_interface < 0 ||
-                           V_if_index < mreq.ipv6mr_interface)
-                               return (EADDRNOTAVAIL);
-                       ifp = ifnet_byindex(mreq.ipv6mr_interface);
-               }
-
-               CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p",
-                   __func__, mreq.ipv6mr_interface, ifp);
-       } break;
+               gsa->sin6.sin6_port = 0;
+               gsa->sin6.sin6_scope_id = 0;
+               ifindex = mreq.ipv6mr_interface;
+               break;
 
        case MCAST_LEAVE_GROUP:
        case MCAST_LEAVE_SOURCE_GROUP:
@@ -2105,17 +2122,22 @@ in6p_leave_group(struct inpcb *inp, stru
                if (gsa->sin6.sin6_family != AF_INET6 ||
                    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
                        return (EINVAL);
-
                if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
                        if (ssa->sin6.sin6_family != AF_INET6 ||
                            ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
                                return (EINVAL);
+                       if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+                               return (EINVAL);
+                       /*
+                        * TODO: Validate embedded scope ID in source
+                        * list entry against passed-in ifp, if and only
+                        * if source list filter entry is iface or node local.
+                        */
+                       in6_clearscope(&ssa->sin6.sin6_addr);
                }
-
-               if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
-                       return (EADDRNOTAVAIL);
-
-               ifp = ifnet_byindex(gsr.gsr_interface);
+               gsa->sin6.sin6_port = 0;
+               gsa->sin6.sin6_scope_id = 0;
+               ifindex = gsr.gsr_interface;
                break;
 
        default:
@@ -2128,14 +2150,39 @@ in6p_leave_group(struct inpcb *inp, stru
        if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
                return (EINVAL);
 
-#ifdef notyet
        /*
-        * FIXME: Need to embed ifp's scope ID in the address
-        * handed down to MLD.
-        * See KAME IPV6_LEAVE_GROUP implementation.
+        * Validate interface index if provided. If no interface index
+        * was provided separately, attempt to look the membership up
+        * from the default scope as a last resort to disambiguate
+        * the membership we are being asked to leave.
+        * XXX SCOPE6 lock potentially taken here.
         */
-       (void)in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL);
-#endif
+       if (ifindex != 0) {
+               if (ifindex < 0 || V_if_index < ifindex)
+                       return (EADDRNOTAVAIL);
+               ifp = ifnet_byindex(ifindex);
+               if (ifp == NULL)
+                       return (EADDRNOTAVAIL);
+               (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+       } else {
+               error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone);
+               if (error)
+                       return (EADDRNOTAVAIL);
+               /*
+                * XXX For now, stomp on zone ID for the corner case.
+                * This is not the 'KAME way', but we need to see the ifp
+                * directly until such time as this implementation is
+                * refactored, assuming the scope IDs are the way to go.
+                */
+               ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
+               KASSERT(ifindex != 0, ("%s: bad zone ID", __func__));
+               ifp = ifnet_byindex(ifindex);
+               if (ifp == NULL)
+                       return (EADDRNOTAVAIL);
+       }
+
+       CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp);
+       KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__));
 
        /*
         * Find the membership in the membership array.
@@ -2312,10 +2359,10 @@ in6p_set_source_filters(struct inpcb *in
 
        if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
                return (EADDRNOTAVAIL);
-
        ifp = ifnet_byindex(msfr.msfr_ifindex);
        if (ifp == NULL)
                return (EADDRNOTAVAIL);
+       (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
 
        /*
         * Take the INP write lock.
@@ -2393,6 +2440,16 @@ in6p_set_source_filters(struct inpcb *in
                                error = EINVAL;
                                break;
                        }
+                       if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       /*
+                        * TODO: Validate embedded scope ID in source
+                        * list entry against passed-in ifp, if and only
+                        * if source list filter entry is iface or node local.
+                        */
+                       in6_clearscope(&psin->sin6_addr);
                        error = im6f_get_source(imf, psin, &lims);
                        if (error)
                                break;
@@ -2560,7 +2617,7 @@ static int
 sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS)
 {
        INIT_VNET_NET(curvnet);
-       struct in6_addr                 *pgina;
+       struct in6_addr                  mcaddr;
        struct in6_addr                  src;
        struct ifnet                    *ifp;
        struct ifmultiaddr              *ifma;
@@ -2591,10 +2648,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
                return (ENOENT);
        }
 
-       pgina = (struct in6_addr *)&name[1];
-       if (!IN6_IS_ADDR_MULTICAST(pgina)) {
+       memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
+       if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
                CTR2(KTR_MLD, "%s: group %s is not multicast",
-                   __func__, ip6_sprintf(ip6tbuf, pgina));
+                   __func__, ip6_sprintf(ip6tbuf, &mcaddr));
                return (EINVAL);
        }
 
@@ -2604,6 +2661,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
                    __func__, ifindex);
                return (ENOENT);
        }
+       /*
+        * Internal MLD lookups require that scope/zone ID is set.
+        */
+       (void)in6_setscope(&mcaddr, ifp, NULL);
 
        retval = sysctl_wire_old_buffer(req,
            sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr)));
@@ -2618,7 +2679,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_
                    ifma->ifma_protospec == NULL)
                        continue;
                inm = (struct in6_multi *)ifma->ifma_protospec;
-               if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, pgina))
+               if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
                        continue;
                fmode = inm->in6m_st[1].iss_fmode;
                retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));

Modified: head/sys/netinet6/ip6_input.c
==============================================================================
--- head/sys/netinet6/ip6_input.c       Wed May 27 18:54:31 2009        
(r192922)
+++ head/sys/netinet6/ip6_input.c       Wed May 27 18:57:13 2009        
(r192923)
@@ -773,10 +773,11 @@ passin:
                 * case we should pass the packet to the multicast routing
                 * daemon.
                 */
-               if (rtalert != ~0 && V_ip6_forwarding) {
+               if (rtalert != ~0) {
                        switch (rtalert) {
                        case IP6OPT_RTALERT_MLD:
-                               ours = 1;
+                               if (V_ip6_forwarding)
+                                       ours = 1;
                                break;
                        default:
                                /*
@@ -820,6 +821,9 @@ passin:
                 * The packet is returned (relatively) intact; if
                 * ip6_mforward() returns a non-zero value, the packet
                 * must be discarded, else it may be accepted below.
+                *
+                * XXX TODO: Check hlim and multicast scope here to avoid
+                * unnecessarily calling into ip6_mforward().
                 */
                if (ip6_mforward &&
                    ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
@@ -882,6 +886,14 @@ passin:
                if (ip6_ipsec_input(m, nxt))
                        goto bad;
 #endif /* IPSEC */
+
+               /*
+                * Use mbuf flags to propagate Router Alert option to
+                * ICMPv6 layer, as hop-by-hop options have been stripped.
+                */
+               if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
+                       m->m_flags |= M_RTALERT_MLD;
+
                nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
        }
        goto out;

Modified: head/sys/netinet6/mld6.c
==============================================================================
--- head/sys/netinet6/mld6.c    Wed May 27 18:54:31 2009        (r192922)
+++ head/sys/netinet6/mld6.c    Wed May 27 18:57:13 2009        (r192923)
@@ -122,9 +122,9 @@ static void mld_slowtimo_vnet(void);
 static void    mld_sysinit(void);
 static void    mld_sysuninit(void);
 static int     mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
-                   const struct mld_hdr *);
+                   /*const*/ struct mld_hdr *);
 static int     mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
-                   const struct mld_hdr *);
+                   /*const*/ struct mld_hdr *);
 static void    mld_v1_process_group_timer(struct in6_multi *, const int);
 static void    mld_v1_process_querier_timers(struct mld_ifinfo *);
 static int     mld_v1_transmit_report(struct in6_multi *, const int);
@@ -239,6 +239,11 @@ SYSCTL_V_PROC(V_NET, vnet_inet6, _net_in
 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE,
     sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 
+static int     mld_v1enable = 1;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
+    &mld_v1enable, 0, "Enable fallback to MLDv1");
+TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable);
+
 /*
  * Packed Router Alert option structure declaration.
  */
@@ -615,36 +620,97 @@ mli_delete_locked(const struct ifnet *if
 /*
  * Process a received MLDv1 general or address-specific query.
  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
  */
 static int
 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
-    const struct mld_hdr *mld)
+    /*const*/ struct mld_hdr *mld)
 {
        struct ifmultiaddr      *ifma;
        struct mld_ifinfo       *mli;
        struct in6_multi        *inm;
+       int                      is_general_query;
        uint16_t                 timer;
 #ifdef KTR
        char                     ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+       is_general_query = 0;
+
+       if (!mld_v1enable) {
+               CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
+                   ip6_sprintf(ip6tbuf, &mld->mld_addr),
+                   ifp, ifp->if_xname);
+               return (0);
+       }
+
+       /*
+        * RFC3810 Section 6.2: MLD queries must originate from
+        * a router's link-local address.
+        */
+       if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+               CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
+                   ip6_sprintf(ip6tbuf, &ip6->ip6_src),
+                   ifp, ifp->if_xname);
+               return (0);
+       }
+
+       /*
+        * Do address field validation upfront before we accept
+        * the query.
+        */
+       if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+               /*
+                * MLDv1 General Query.
+                * If this was not sent to the all-nodes group, ignore it.
+                */
+               struct in6_addr          dst;
+
+               dst = ip6->ip6_dst;
+               in6_clearscope(&dst);
+               if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
+                       return (EINVAL);
+               is_general_query = 1;
+       } else {
+               /*
+                * Embed scope ID of receiving interface in MLD query for
+                * lookup whilst we don't hold other locks.
+                */
+               in6_setscope(&mld->mld_addr, ifp, NULL);
+       }
+
        IN6_MULTI_LOCK();
        MLD_LOCK();
        IF_ADDR_LOCK(ifp);
 
-       mli = MLD_IFINFO(ifp);
-       KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
-
        /*
         * Switch to MLDv1 host compatibility mode.
         */
+       mli = MLD_IFINFO(ifp);
+       KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
        mld_set_version(mli, MLD_VERSION_1);
 
-       timer = ntohs(mld->mld_maxdelay) * PR_FASTHZ / MLD_TIMER_SCALE;
+       timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE;
        if (timer == 0)
                timer = 1;
 
-       if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+       if (is_general_query) {
+               /*
+                * For each reporting group joined on this
+                * interface, kick the report timer.
+                */
+               CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
+                   ifp, ifp->if_xname);
+               TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+                       if (ifma->ifma_addr->sa_family != AF_INET6 ||
+                           ifma->ifma_protospec == NULL)
+                               continue;
+                       inm = (struct in6_multi *)ifma->ifma_protospec;
+                       mld_v1_update_group(inm, timer);
+               }
+       } else {
                /*
                 * MLDv1 Group-Specific Query.
                 * If this is a group-specific MLDv1 query, we need only
@@ -657,32 +723,8 @@ mld_v1_input_query(struct ifnet *ifp, co
                            ifp, ifp->if_xname);
                        mld_v1_update_group(inm, timer);
                }
-       } else {
-               /*
-                * MLDv1 General Query.
-                * If this was not sent to the all-nodes group, ignore it.
-                */
-               struct in6_addr dst;
-
-               dst = ip6->ip6_dst;
-               in6_clearscope(&dst);
-               if (IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
-                       /*
-                        * For each reporting group joined on this
-                        * interface, kick the report timer.
-                        */
-                       CTR2(KTR_MLD,
-                           "process v1 general query on ifp %p(%s)",
-                           ifp, ifp->if_xname);
-
-                       TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
-                               if (ifma->ifma_addr->sa_family != AF_INET6 ||
-                                   ifma->ifma_protospec == NULL)
-                                       continue;
-                               inm = (struct in6_multi *)ifma->ifma_protospec;
-                               mld_v1_update_group(inm, timer);
-                       }
-               }
+               /* XXX Clear embedded scope ID as userland won't expect it. */
+               in6_clearscope(&mld->mld_addr);
        }
 
        IF_ADDR_UNLOCK(ifp);
@@ -769,18 +811,38 @@ mld_v2_input_query(struct ifnet *ifp, co
        struct mldv2_query      *mld;
        struct in6_multi        *inm;
        uint32_t                 maxdelay, nsrc, qqi;
+       int                      is_general_query;
        uint16_t                 timer;
        uint8_t                  qrv;
+#ifdef KTR
+       char                     ip6tbuf[INET6_ADDRSTRLEN];
+#endif
+
+       is_general_query = 0;
+
+       /*
+        * RFC3810 Section 6.2: MLD queries must originate from
+        * a router's link-local address.
+        */
+       if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+               CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
+                   ip6_sprintf(ip6tbuf, &ip6->ip6_src),
+                   ifp, ifp->if_xname);
+               return (0);
+       }
 
-       CTR2(KTR_MLD, "process v2 query on ifp %p(%s)", ifp, ifp->if_xname);
+       CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname);
 
        mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
 
        maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
        if (maxdelay >= 32678) {
-               maxdelay = (MLD_MRC_MANT(mld->mld_maxdelay) | 0x1000) <<
-                          (MLD_MRC_EXP(mld->mld_maxdelay) + 3);
+               maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
+                          (MLD_MRC_EXP(maxdelay) + 3);
        }
+       timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE;
+       if (timer == 0)
+               timer = 1;
 
        qrv = MLD_QRV(mld->mld_misc);
        if (qrv < 2) {
@@ -795,10 +857,6 @@ mld_v2_input_query(struct ifnet *ifp, co
                     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
        }
 
-       timer = maxdelay * PR_FASTHZ / MLD_TIMER_SCALE;
-       if (timer == 0)
-               timer = 1;
-
        nsrc = ntohs(mld->mld_numsrc);
        if (nsrc > MLD_MAX_GS_SOURCES)
                return (EMSGSIZE);
@@ -806,6 +864,33 @@ mld_v2_input_query(struct ifnet *ifp, co
            (nsrc * sizeof(struct in6_addr)))
                return (EMSGSIZE);
 
+       /*
+        * Do further input validation upfront to avoid resetting timers
+        * should we need to discard this query.
+        */
+       if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+               /*
+                * General Queries SHOULD be directed to ff02::1.
+                * A general query with a source list has undefined
+                * behaviour; discard it.
+                */
+               struct in6_addr          dst;
+
+               dst = ip6->ip6_dst;
+               in6_clearscope(&dst);
+               if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
+                   nsrc > 0)
+                       return (EINVAL);
+               is_general_query = 1;
+       } else {
+               /*
+                * Embed scope ID of receiving interface in MLD query for
+                * lookup whilst we don't hold other locks (due to KAME
+                * locking lameness). We own this mbuf chain just now.
+                */
+               in6_setscope(&mld->mld_addr, ifp, NULL);
+       }
+
        IN6_MULTI_LOCK();
        MLD_LOCK();
        IF_ADDR_LOCK(ifp);
@@ -813,8 +898,15 @@ mld_v2_input_query(struct ifnet *ifp, co
        mli = MLD_IFINFO(ifp);
        KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
 
-       mld_set_version(mli, MLD_VERSION_2);
+       /*
+        * Discard the v2 query if we're in Compatibility Mode.
+        * The RFC is pretty clear that hosts need to stay in MLDv1 mode
+        * until the Old Version Querier Present timer expires.
+        */
+       if (mli->mli_version != MLD_VERSION_2)
+               goto out_locked;
 
+       mld_set_version(mli, MLD_VERSION_2);
        mli->mli_rv = qrv;
        mli->mli_qi = qqi;
        mli->mli_qri = maxdelay;
@@ -822,39 +914,20 @@ mld_v2_input_query(struct ifnet *ifp, co
        CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi,
            maxdelay);
 
-       if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+       if (is_general_query) {
                /*
                 * MLDv2 General Query.
                 *
                 * Schedule a current-state report on this ifp for
                 * all groups, possibly containing source lists.
                 *
-                * Strip scope ID embedded by ip6_input(). We do not need
-                * to do this for the MLD payload.
-                */
-               struct in6_addr dst;
-
-               dst = ip6->ip6_dst;
-               in6_clearscope(&dst);
-               if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
-                   nsrc > 0) {
-                       /*
-                        * General Queries SHOULD be directed to ff02::1.
-                        * A general query with a source list has undefined
-                        * behaviour; discard it.
-                        */
-                       goto out_locked;
-               }
-
-               CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
-                   ifp, ifp->if_xname);
-
-               /*
                 * If there is a pending General Query response
                 * scheduled earlier than the selected delay, do
                 * not schedule any other reports.
                 * Otherwise, reset the interface timer.
                 */
+               CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
+                   ifp, ifp->if_xname);
                if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
                        mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
                        V_interface_timers_running6 = 1;
@@ -890,6 +963,9 @@ mld_v2_input_query(struct ifnet *ifp, co
                 */
                if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer)
                        mld_v2_process_group_query(inm, mli, timer, m, off);
+
+               /* XXX Clear embedded scope ID as userland won't expect it. */
+               in6_clearscope(&mld->mld_addr);
        }
 
 out_locked:
@@ -1017,27 +1093,57 @@ mld_v2_process_group_query(struct in6_mu
 /*
  * Process a received MLDv1 host membership report.
  * Assumes mld points to mld_hdr in pulled up mbuf chain.
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
  */
 static int
 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
-    const struct mld_hdr *mld)
+    /*const*/ struct mld_hdr *mld)
 {
+       struct in6_addr          src, dst;
        struct in6_ifaddr       *ia;
        struct in6_multi        *inm;
-       struct in6_addr          src, dst;
 #ifdef KTR
        char                     ip6tbuf[INET6_ADDRSTRLEN];
 #endif
 
+       if (!mld_v1enable) {
+               CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
+                   ip6_sprintf(ip6tbuf, &mld->mld_addr),
+                   ifp, ifp->if_xname);
+               return (0);
+       }
+
        if (ifp->if_flags & IFF_LOOPBACK)
                return (0);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to