Hi Wojciech,

On 21 Jan 2022, at 6:19, Wojciech Macek wrote:
> The branch main has been updated by wma:
>
> URL: 
> https://cgit.FreeBSD.org/src/commit/?id=9ce46cbc95d7a6fccb55af0d42cbb85c29f10639
>
> commit 9ce46cbc95d7a6fccb55af0d42cbb85c29f10639
> Author:     Wojciech Macek <w...@freebsd.org>
> AuthorDate: 2022-01-21 05:15:08 +0000
> Commit:     Wojciech Macek <w...@freebsd.org>
> CommitDate: 2022-01-21 05:17:19 +0000
>
>     ip_mroute: move ip_mrouter_done outside lock
>
>     X_ip_mrouter_done might sleep, which triggers INVARIANTS to
>     print additional errors on the screen.
>     Move it outside the lock, but provide some basic synchronization
>     to avoid race condition during module uninit/unload.
>
>     Obtained from:          Semihalf
>     Sponsored by:           Stormshield

I suspect this change causes panics like this one: 
https://ci.freebsd.org/job/FreeBSD-main-amd64-test/20437/consoleText

>  sys/netinet/ip_mroute.c | 11 ++++++++---
>  sys/netinet/ip_mroute.h |  4 +++-
>  sys/netinet/raw_ip.c    | 11 ++++++++---
>  3 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c
> index 8cd0b2ac7449..0566048621ad 100644
> --- a/sys/netinet/ip_mroute.c
> +++ b/sys/netinet/ip_mroute.c
> @@ -300,7 +300,7 @@ VNET_DEFINE_STATIC(struct ifnet *, multicast_register_if);
>  static u_long        X_ip_mcast_src(int);
>  static int   X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
>                   struct ip_moptions *);
> -static int   X_ip_mrouter_done(void);
> +static int   X_ip_mrouter_done(void *);
>  static int   X_ip_mrouter_get(struct socket *, struct sockopt *);
>  static int   X_ip_mrouter_set(struct socket *, struct sockopt *);
>  static int   X_legal_vif_num(int);
> @@ -431,7 +431,7 @@ X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
>       break;
>
>      case MRT_DONE:
> -     error = ip_mrouter_done();
> +     error = ip_mrouter_done(NULL);
>       break;
>
>      case MRT_ADD_VIF:
> @@ -734,7 +734,7 @@ ip_mrouter_init(struct socket *so, int version)
>   * Disable multicast forwarding.
>   */
>  static int
> -X_ip_mrouter_done(void)
> +X_ip_mrouter_done(void *locked)
>  {
>      struct ifnet *ifp;
>      u_long i;
> @@ -751,6 +751,11 @@ X_ip_mrouter_done(void)
>      atomic_subtract_int(&ip_mrouter_cnt, 1);
>      V_mrt_api_config = 0;
>
> +    if (locked) {
> +     struct epoch_tracker *mrouter_et = locked;
> +     MROUTER_RUNLOCK_PARAM(mrouter_et);
> +    }
> +
>      MROUTER_WAIT();
>
>      /* Stop and drain task queue */
> diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h
> index 65c5bdd3a025..016d026d184c 100644
> --- a/sys/netinet/ip_mroute.h
> +++ b/sys/netinet/ip_mroute.h
> @@ -363,12 +363,14 @@ struct sockopt;
>
>  extern int   (*ip_mrouter_set)(struct socket *, struct sockopt *);
>  extern int   (*ip_mrouter_get)(struct socket *, struct sockopt *);
> -extern int   (*ip_mrouter_done)(void);
> +extern int   (*ip_mrouter_done)(void *);
>  extern int   (*mrt_ioctl)(u_long, caddr_t, int);
>
>  #define      MROUTER_RLOCK_TRACKER   struct epoch_tracker mrouter_et
> +#define      MROUTER_RLOCK_PARAM_PTR         &mrouter_et
>  #define      MROUTER_RLOCK() epoch_enter_preempt(net_epoch_preempt, 
> &mrouter_et)
>  #define      MROUTER_RUNLOCK()       epoch_exit_preempt(net_epoch_preempt, 
> &mrouter_et)
> +#define      MROUTER_RUNLOCK_PARAM(param)    
> epoch_exit_preempt(net_epoch_preempt, param)
>  #define      MROUTER_WAIT()  epoch_wait_preempt(net_epoch_preempt)
>
>  #endif /* _KERNEL */
> diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
> index 7c495745806e..08ce848a63f7 100644
> --- a/sys/netinet/raw_ip.c
> +++ b/sys/netinet/raw_ip.c
> @@ -119,7 +119,7 @@ VNET_DEFINE(struct socket *, ip_mrouter);
>   */
>  int (*ip_mrouter_set)(struct socket *, struct sockopt *);
>  int (*ip_mrouter_get)(struct socket *, struct sockopt *);
> -int (*ip_mrouter_done)(void);
> +int (*ip_mrouter_done)(void *locked);
>  int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
>                  struct ip_moptions *);
>  int (*mrt_ioctl)(u_long, caddr_t, int);
> @@ -879,18 +879,23 @@ static void
>  rip_detach(struct socket *so)
>  {
>       struct inpcb *inp;
> +     MROUTER_RLOCK_TRACKER;
>
>       inp = sotoinpcb(so);
>       KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
>       KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
>           ("rip_detach: not closed"));
>
> +     /* Disable mrouter first, lock released inside ip_mrouter_done */
> +     MROUTER_RLOCK();
> +     if (so == V_ip_mrouter && ip_mrouter_done)
> +             ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR);
> +

I believe this is the problem.

If we do not enter ip_mrouter_done() here we’ll exit the function without 
exiting epoch. The epoch tracker on the stack will be overwritten, and that 
could produce the panic we see in ci.freebsd.org.

Best regards,
Kristof

Reply via email to