Hi,

In ARP we have a queue of packets that should be sent after name
resolution.  In ND6 we only hold a single packet.  I would like to
unify the logic.  As a bonus we limit the mbufs and get MP safe
mbuf queue.

The new function if_mqoutput() has common code for ARP and ND6.
ln_saddr6 holds the source address of the requesting packet.  That
seems easier than fiddling with mbuf queue in nd6_ns_output().

ok?

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.685
diff -u -p -r1.685 if.c
--- net/if.c    7 Mar 2023 20:09:48 -0000       1.685
+++ net/if.c    2 Apr 2023 22:37:23 -0000
@@ -753,6 +753,27 @@ if_enqueue_ifq(struct ifnet *ifp, struct
 }
 
 void
+if_mqoutput(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
+    struct sockaddr *dst, struct rtentry *rt)
+{
+       struct mbuf_list ml;
+       struct mbuf *m;
+       unsigned int len;
+
+       mq_delist(mq, &ml);
+       len = ml_len(&ml);
+       while ((m = ml_dequeue(&ml)) != NULL)
+               ifp->if_output(ifp, m, rt_key(rt), rt);
+
+       /* XXXSMP we also discard if other CPU enqueues */
+       if (mq_len(mq) > 0) {
+               /* mbuf is back in queue. Discard. */
+               atomic_sub_int(total, len + mq_purge(mq));
+       } else
+               atomic_sub_int(total, len);
+}
+
+void
 if_input(struct ifnet *ifp, struct mbuf_list *ml)
 {
        ifiq_input(&ifp->if_rcv, ml);
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
retrieving revision 1.122
diff -u -p -r1.122 if_var.h
--- net/if_var.h        23 Nov 2022 14:50:59 -0000      1.122
+++ net/if_var.h        2 Apr 2023 22:37:23 -0000
@@ -320,6 +320,8 @@ extern struct ifnet_head ifnetlist;
 void   if_start(struct ifnet *);
 int    if_enqueue(struct ifnet *, struct mbuf *);
 int    if_enqueue_ifq(struct ifnet *, struct mbuf *);
+void   if_mqoutput(struct ifnet *, struct mbuf_queue *, unsigned int *,
+           struct sockaddr *, struct rtentry *);
 void   if_input(struct ifnet *, struct mbuf_list *);
 void   if_vinput(struct ifnet *, struct mbuf *);
 void   if_input_process(struct ifnet *, struct mbuf_list *);
Index: netinet/if_ether.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.258
diff -u -p -r1.258 if_ether.c
--- netinet/if_ether.c  8 Mar 2023 04:43:08 -0000       1.258
+++ netinet/if_ether.c  2 Apr 2023 22:37:23 -0000
@@ -653,10 +653,7 @@ arpcache(struct ifnet *ifp, struct ether
        struct in_addr *spa = (struct in_addr *)ea->arp_spa;
        char addr[INET_ADDRSTRLEN];
        struct ifnet *rifp;
-       struct mbuf_list ml;
-       struct mbuf *m;
        time_t uptime;
-       unsigned int len;
        int changed = 0;
 
        KERNEL_ASSERT_LOCKED();
@@ -729,16 +726,7 @@ arpcache(struct ifnet *ifp, struct ether
 
        la->la_asked = 0;
        la->la_refreshed = 0;
-       mq_delist(&la->la_mq, &ml);
-       len = ml_len(&ml);
-       while ((m = ml_dequeue(&ml)) != NULL)
-               ifp->if_output(ifp, m, rt_key(rt), rt);
-       /* XXXSMP we discard if other CPU enqueues */
-       if (mq_len(&la->la_mq) > 0) {
-               /* mbuf is back in queue. Discard. */
-               atomic_sub_int(&la_hold_total, len + mq_purge(&la->la_mq));
-       } else
-               atomic_sub_int(&la_hold_total, len);
+       if_mqoutput(ifp, &la->la_mq, &la_hold_total, rt_key(rt), rt);
 
        return (0);
 }
Index: netinet6/nd6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.268
diff -u -p -r1.268 nd6.c
--- netinet6/nd6.c      31 Mar 2023 19:43:32 -0000      1.268
+++ netinet6/nd6.c      3 Apr 2023 09:35:39 -0000
@@ -87,6 +87,7 @@ int nd6_debug = 0;
 TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list;
 struct pool nd6_pool;          /* pool for llinfo_nd6 structures */
 int    nd6_inuse;
+int    ln_hold_total;          /* [a] packets currently in the nd6 queue */
 
 void nd6_timer(void *);
 void nd6_slowtimo(void *);
@@ -300,26 +301,33 @@ nd6_llinfo_timer(struct rtentry *rt)
                        nd6_llinfo_settimer(ln, RETRANS_TIMER / 1000);
                        nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
                } else {
-                       struct mbuf *m = ln->ln_hold;
-                       if (m) {
-                               ln->ln_hold = NULL;
+                       struct mbuf_list ml;
+                       struct mbuf *m;
+                       unsigned int len;
+
+                       mq_delist(&ln->ln_mq, &ml);
+                       len = ml_len(&ml);
+                       while ((m = ml_dequeue(&ml)) != NULL) {
                                /*
                                 * Fake rcvif to make the ICMP error
                                 * more helpful in diagnosing for the
                                 * receiver.
-                                * XXX: should we consider
-                                * older rcvif?
+                                * XXX: should we consider older rcvif?
                                 */
                                m->m_pkthdr.ph_ifidx = rt->rt_ifidx;
 
                                icmp6_error(m, ICMP6_DST_UNREACH,
                                    ICMP6_DST_UNREACH_ADDR, 0);
-                               if (ln->ln_hold == m) {
-                                       /* m is back in ln_hold. Discard. */
-                                       m_freem(ln->ln_hold);
-                                       ln->ln_hold = NULL;
-                               }
                        }
+
+                       /* XXXSMP we also discard if other CPU enqueues */
+                       if (mq_len(&ln->ln_mq) > 0) {
+                               /* mbuf is back in queue. Discard. */
+                               atomic_sub_int(&ln_hold_total,
+                                   len + mq_purge(&ln->ln_mq));
+                       } else
+                               atomic_sub_int(&ln_hold_total, len);
+
                        nd6_free(rt);
                        ln = NULL;
                }
@@ -613,9 +621,8 @@ nd6_invalidate(struct rtentry *rt)
        struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
        struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
 
-       m_freem(ln->ln_hold);
+       atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
        sdl->sdl_alen = 0;
-       ln->ln_hold = NULL;
        ln->ln_state = ND6_LLINFO_INCOMPLETE;
        ln->ln_asked = 0;
 }
@@ -800,6 +807,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                        log(LOG_DEBUG, "%s: pool get failed\n", __func__);
                        break;
                }
+               mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
                nd6_inuse++;
                ln->ln_rt = rt;
                /* this is required for "ndp" command. - shin */
@@ -921,7 +929,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
                rt->rt_expire = 0;
                rt->rt_llinfo = NULL;
                rt->rt_flags &= ~RTF_LLINFO;
-               m_freem(ln->ln_hold);
+               atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
                pool_put(&nd6_pool, ln);
                break;
 
@@ -1125,21 +1133,8 @@ fail:
                         * meaningless.
                         */
                        nd6_llinfo_settimer(ln, nd6_gctimer);
-
-                       if (ln->ln_hold) {
-                               struct mbuf *n = ln->ln_hold;
-                               ln->ln_hold = NULL;
-                               /*
-                                * we assume ifp is not a p2p here, so just
-                                * set the 2nd argument as the 1st one.
-                                */
-                               ifp->if_output(ifp, n, rt_key(rt), rt);
-                               if (ln->ln_hold == n) {
-                                       /* n is back in ln_hold. Discard. */
-                                       m_freem(ln->ln_hold);
-                                       ln->ln_hold = NULL;
-                               }
-                       }
+                       if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total,
+                           rt_key(rt), rt);
                } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
                        /* probe right away */
                        nd6_llinfo_settimer(ln, 0);
@@ -1328,13 +1323,23 @@ nd6_resolve(struct ifnet *ifp, struct rt
 
        /*
         * There is a neighbor cache entry, but no ethernet address
-        * response yet.  Replace the held mbuf (if any) with this
-        * latest one.
+        * response yet.  Insert mbuf in hold queue if below limit.
+        * If above the limit free the queue without queuing the new packet.
         */
        if (ln->ln_state == ND6_LLINFO_NOSTATE)
                ln->ln_state = ND6_LLINFO_INCOMPLETE;
-       m_freem(ln->ln_hold);
-       ln->ln_hold = m;
+       /* source address of prompting packet is needed by nd6_ns_output() */
+       if (m->m_len >= sizeof(struct ip6_hdr)) {
+               memcpy(&ln->ln_saddr6, &mtod(m, struct ip6_hdr *)->ip6_src,
+                   sizeof(ln->ln_saddr6));
+       }
+       if (atomic_inc_int_nv(&ln_hold_total) <= LN_HOLD_TOTAL) {
+               if (mq_push(&ln->ln_mq, m) != 0)
+                       atomic_dec_int(&ln_hold_total);
+       } else {
+               atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq) + 1);
+               m_freem(m);
+       }
 
        /*
         * If there has been no NS for the neighbor after entering the
Index: netinet6/nd6.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.h,v
retrieving revision 1.95
diff -u -p -r1.95 nd6.h
--- netinet6/nd6.h      6 Jan 2023 14:35:34 -0000       1.95
+++ netinet6/nd6.h      2 Apr 2023 22:37:23 -0000
@@ -81,12 +81,17 @@ struct      in6_ndireq {
 struct llinfo_nd6 {
        TAILQ_ENTRY(llinfo_nd6) ln_list;
        struct  rtentry *ln_rt;
-       struct  mbuf *ln_hold;  /* last packet until resolved/timeout */
+       struct  mbuf_queue ln_mq;       /* hold packets until resolved */
+       struct  in6_addr ln_saddr6;     /* source of prompting packet */
        long    ln_asked;       /* number of queries already sent for addr */
        int     ln_byhint;      /* # of times we made it reachable by UL hint */
        short   ln_state;       /* reachability state */
        short   ln_router;      /* 2^0: ND6 router bit */
 };
+#define LN_HOLD_QUEUE 10
+#define LN_HOLD_TOTAL 100
+
+extern int ln_hold_total;
 
 #define ND6_LLINFO_PERMANENT(n)        ((n)->ln_rt->rt_expire == 0)
 
Index: netinet6/nd6_nbr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.143
diff -u -p -r1.143 nd6_nbr.c
--- netinet6/nd6_nbr.c  31 Mar 2023 19:43:33 -0000      1.143
+++ netinet6/nd6_nbr.c  2 Apr 2023 22:37:23 -0000
@@ -451,23 +451,14 @@ nd6_ns_output(struct ifnet *ifp, const s
                 * - if taddr is link local saddr6 must be link local as well
                 * Otherwise, we perform the source address selection as usual.
                 */
-               struct ip6_hdr *hip6;           /* hold ip6 */
-               struct in6_addr *saddr6;
+               if (ln != NULL)
+                       src_sa.sin6_addr = ln->ln_saddr6;
+
+               if (!IN6_IS_ADDR_LINKLOCAL(taddr6) ||
+                   IN6_IS_ADDR_UNSPECIFIED(&src_sa.sin6_addr) ||
+                   IN6_IS_ADDR_LINKLOCAL(&src_sa.sin6_addr) ||
+                   !in6ifa_ifpwithaddr(ifp, &src_sa.sin6_addr)) {
 
-               if (ln && ln->ln_hold) {
-                       hip6 = mtod(ln->ln_hold, struct ip6_hdr *);
-                       if (sizeof(*hip6) <= ln->ln_hold->m_len) {
-                               saddr6 = &hip6->ip6_src;
-                               if (saddr6 && IN6_IS_ADDR_LINKLOCAL(taddr6) &&
-                                   !IN6_IS_ADDR_LINKLOCAL(saddr6))
-                                       saddr6 = NULL;
-                       } else
-                               saddr6 = NULL;
-               } else
-                       saddr6 = NULL;
-               if (saddr6 && in6ifa_ifpwithaddr(ifp, saddr6))
-                       src_sa.sin6_addr = *saddr6;
-               else {
                        struct rtentry *rt;
 
                        rt = rtalloc(sin6tosa(&dst_sa), RT_RESOLVE,
@@ -832,20 +823,7 @@ nd6_na_input(struct mbuf *m, int off, in
        }
        rt->rt_flags &= ~RTF_REJECT;
        ln->ln_asked = 0;
-       if (ln->ln_hold) {
-               struct mbuf *n = ln->ln_hold;
-               ln->ln_hold = NULL;
-               /*
-                * we assume ifp is not a loopback here, so just set the 2nd
-                * argument as the 1st one.
-                */
-               ifp->if_output(ifp, n, rt_key(rt), rt);
-               if (ln->ln_hold == n) {
-                       /* n is back in ln_hold. Discard. */
-                       m_freem(ln->ln_hold);
-                       ln->ln_hold = NULL;
-               }
-       }
+       if_mqoutput(ifp, &ln->ln_mq, &ln_hold_total, rt_key(rt), rt);
 
  freeit:
        rtfree(rt);

Reply via email to