On 21 Jan 2022, at 14:01, Kristof Provost wrote:
Hi Wojciech,

On 21 Jan 2022, at 6:19, Wojciech Macek wrote:
The branch main has been updated by wma:

URL: https://cgit.FreeBSD.org/src/commit/?id=9ce46cbc95d7a6fccb55af0d42cbb85c29f10639

commit 9ce46cbc95d7a6fccb55af0d42cbb85c29f10639
Author:     Wojciech Macek <w...@freebsd.org>
AuthorDate: 2022-01-21 05:15:08 +0000
Commit:     Wojciech Macek <w...@freebsd.org>
CommitDate: 2022-01-21 05:17:19 +0000

    ip_mroute: move ip_mrouter_done outside lock

    X_ip_mrouter_done might sleep, which triggers INVARIANTS to
    print additional errors on the screen.
    Move it outside the lock, but provide some basic synchronization
    to avoid race condition during module uninit/unload.

    Obtained from:          Semihalf
    Sponsored by:           Stormshield

I suspect this change causes panics like this one: https://ci.freebsd.org/job/FreeBSD-main-amd64-test/20437/consoleText

 sys/netinet/ip_mroute.c | 11 ++++++++---
 sys/netinet/ip_mroute.h |  4 +++-
 sys/netinet/raw_ip.c    | 11 ++++++++---
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
index 8cd0b2ac7449..0566048621ad 100644
--- a/sys/netinet/ip_mroute.c
+++ b/sys/netinet/ip_mroute.c
@@ -300,7 +300,7 @@ VNET_DEFINE_STATIC(struct ifnet *, multicast_register_if);
 static u_long  X_ip_mcast_src(int);
 static int     X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
                    struct ip_moptions *);
-static int     X_ip_mrouter_done(void);
+static int     X_ip_mrouter_done(void *);
 static int     X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int     X_ip_mrouter_set(struct socket *, struct sockopt *);
 static int     X_legal_vif_num(int);
@@ -431,7 +431,7 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
        break;

     case MRT_DONE:
-       error = ip_mrouter_done();
+       error = ip_mrouter_done(NULL);
        break;

     case MRT_ADD_VIF:
@@ -734,7 +734,7 @@ ip_mrouter_init(struct socket *so, int version)
  * Disable multicast forwarding.
  */
 static int
-X_ip_mrouter_done(void)
+X_ip_mrouter_done(void *locked)
 {
     struct ifnet *ifp;
     u_long i;
@@ -751,6 +751,11 @@ X_ip_mrouter_done(void)
     atomic_subtract_int(&ip_mrouter_cnt, 1);
     V_mrt_api_config = 0;

+    if (locked) {
+       struct epoch_tracker *mrouter_et = locked;
+       MROUTER_RUNLOCK_PARAM(mrouter_et);
+    }
+
     MROUTER_WAIT();

     /* Stop and drain task queue */
diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
index 65c5bdd3a025..016d026d184c 100644
--- a/sys/netinet/ip_mroute.h
+++ b/sys/netinet/ip_mroute.h
@@ -363,12 +363,14 @@ struct sockopt;

 extern int     (*ip_mrouter_set)(struct socket *, struct sockopt *);
 extern int     (*ip_mrouter_get)(struct socket *, struct sockopt *);
-extern int     (*ip_mrouter_done)(void);
+extern int     (*ip_mrouter_done)(void *);
 extern int     (*mrt_ioctl)(u_long, caddr_t, int);

 #define        MROUTER_RLOCK_TRACKER   struct epoch_tracker mrouter_et
+#define        MROUTER_RLOCK_PARAM_PTR         &mrouter_et
#define MROUTER_RLOCK() epoch_enter_preempt(net_epoch_preempt, &mrouter_et) #define MROUTER_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &mrouter_et) +#define MROUTER_RUNLOCK_PARAM(param) epoch_exit_preempt(net_epoch_preempt, param)
 #define        MROUTER_WAIT()  epoch_wait_preempt(net_epoch_preempt)

 #endif /* _KERNEL */
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 7c495745806e..08ce848a63f7 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -119,7 +119,7 @@ VNET_DEFINE(struct socket *, ip_mrouter);
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
-int (*ip_mrouter_done)(void);
+int (*ip_mrouter_done)(void *locked);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
                   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
@@ -879,18 +879,23 @@ static void
 rip_detach(struct socket *so)
 {
        struct inpcb *inp;
+       MROUTER_RLOCK_TRACKER;

        inp = sotoinpcb(so);
        KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
        KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
            ("rip_detach: not closed"));

+       /* Disable mrouter first, lock released inside ip_mrouter_done */
+       MROUTER_RLOCK();
+       if (so == V_ip_mrouter && ip_mrouter_done)
+               ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR);
+

I believe this is the problem.

If we do not enter ip_mrouter_done() here we’ll exit the function without exiting epoch. The epoch tracker on the stack will be overwritten, and that could produce the panic we see in ci.freebsd.org.

I’m currently running with this patch:

        diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
        index 0566048621ad..ff68b140af7e 100644
        --- a/sys/netinet/ip_mroute.c
        +++ b/sys/netinet/ip_mroute.c
        @@ -741,8 +741,13 @@ X_ip_mrouter_done(void *locked)
             vifi_t vifi;
             struct bw_upcall *bu;

        -    if (V_ip_mrouter == NULL)
        -       return EINVAL;
        +    if (V_ip_mrouter == NULL) {
        +        if (locked) {
        +            struct epoch_tracker *mrouter_et = locked;
        +            MROUTER_RUNLOCK_PARAM(mrouter_et);
        +        }
        +        return (EINVAL);
        +    }

             /*
              * Detach/disable hooks to the reset of the system.
        diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
        index 08ce848a63f7..4354bee3cfcc 100644
        --- a/sys/netinet/raw_ip.c
        +++ b/sys/netinet/raw_ip.c
        @@ -887,9 +887,10 @@ rip_detach(struct socket *so)
                    ("rip_detach: not closed"));

/* Disable mrouter first, lock released inside ip_mrouter_done */
        -       MROUTER_RLOCK();
        -       if (so == V_ip_mrouter && ip_mrouter_done)
        +       if (so == V_ip_mrouter && ip_mrouter_done) {
        +               MROUTER_RLOCK();
                        ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR);
        +       }

                INP_WLOCK(inp);
                INP_HASH_WLOCK(&V_ripcbinfo);

However, it’s not at all clear to me what we’re actually accomplishing by entering the net epoch here. As far as I can tell that’s basically a no-op.

Kristof

Reply via email to