On Mon, Mar 04, 2024 at 01:00:32PM +0100, Claudio Jeker wrote:
> On Wed, Feb 28, 2024 at 06:03:14PM +0300, Vitaliy Makkoveev wrote:
> > On Wed, Feb 28, 2024 at 02:45:44PM +0100, Martin Pieuchot wrote:
> > > > > > 
> > > > > > Not only wg(4). Depends on interface queue usage, ifq_start() 
> > > > > > schedules
> > > > > > (*if_qstart)() or calls it, so all the interfaces with use 
> > > > > > rwlock(9) in
> > > > > > (*if_qstart)() handler are in risk.
> > > > > > 
> > > > > > What about to always schedule (*if_qstart)()?
> > > > > 
> > > > > Why would you want to introduce additional latence?
> > > > > 
> > > > 
> > > > I suppose it the less evil than strictly deny rwlocks in (*if_qstart)().
> > > > Anyway it will be scheduled unless `seq_len' exceeds the watermark. 
> > > 
> > > Please no.  This is not going to happen.  wg(4) has to be fixed.  Let's
> > > not change the design of the kernel every time a bug is found.
> > > 
> > 
> > I'm not the fan of ifq_start() behaviour.
> > 
> > wg(4) needs to convert `t_lock', `r_keypair_lock' and `c_lock' rwlocks
> > to mutexes. I used mtx_init_flags() to keep existing names.
> 
> You do not mention if you verified that any of those codepath may sleep or
> not.

I use wg(4), so, obviously, I verified this.

wg_qstart() calls the following routines: noise_remote_ready(),
wg_tag_get(), wg_timers_event_want_initiation() and wg_queue_out().

noise_remote_ready() has the sleep point produced by `r_keypair_lock'
rwlock. We also acquire `c_lock' rwlock with `r_keypair_lock' rwlock
held. I changed them both to `r_keypair_mtx' and `c_mtx' mutexes.

`c_lock' serialized paths noise_counter_send() and noise_counter_recv()
have no sleep points.

`r_keypair_lock' serialized paths noise_remote_begin_session(),
noise_remote_clear(), noise_remote_expire_current(),
noise_remote_ready(), noise_remote_encrypt() and  noise_remote_decrypt()
have no sleep points. The suspicious noise_remote_keypair_allocate()
only performs operations around list.

wg_tag_get() has no sleep points.

wg_timers_event_want_initiation() has the sleep point produced by
`t_lock' rwlock. We also acquire `t_handshake_mtx' mutex with
`t_lock' held. I changed `t_lock' to `t_mtx' mutex.

wg_queue_out() has no sleep points. 

`c_lock' protected paths has no sleep points.

The `t_mtx' is the simplest for review. It protects `t_disabled' and
`t_need_another_keepalive' and the serialized paths are timeout or task
scheduling.

> ...I don't have the time to go over all those codepaths and check.
>  

I thought the reported is interesting to fix this and provides some
feedback.

> > Index: sys/net/if_wg.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/if_wg.c,v
> > retrieving revision 1.36
> > diff -u -p -r1.36 if_wg.c
> > --- sys/net/if_wg.c 18 Jan 2024 08:46:41 -0000      1.36
> > +++ sys/net/if_wg.c 28 Feb 2024 14:49:16 -0000
> > @@ -150,8 +150,8 @@ struct wg_index {
> >  };
> >  
> >  struct wg_timers {
> > -   /* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
> > -   struct rwlock            t_lock;
> > +   /* t_mtx is for blocking wg_timers_event_* when setting t_disabled. */
> > +   struct mutex             t_mtx;
> >  
> >     int                      t_disabled;
> >     int                      t_need_another_keepalive;
> > @@ -930,7 +930,7 @@ void
> >  wg_timers_init(struct wg_timers *t)
> >  {
> >     bzero(t, sizeof(*t));
> > -   rw_init(&t->t_lock, "wg_timers");
> > +   mtx_init_flags(&t->t_mtx, IPL_NET, "wg_timers", 0);
> >     mtx_init(&t->t_handshake_mtx, IPL_NET);
> >  
> >     timeout_set(&t->t_new_handshake, wg_timers_run_new_handshake, t);
> > @@ -945,19 +945,19 @@ wg_timers_init(struct wg_timers *t)
> >  void
> >  wg_timers_enable(struct wg_timers *t)
> >  {
> > -   rw_enter_write(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     t->t_disabled = 0;
> > -   rw_exit_write(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >     wg_timers_run_persistent_keepalive(t);
> >  }
> >  
> >  void
> >  wg_timers_disable(struct wg_timers *t)
> >  {
> > -   rw_enter_write(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     t->t_disabled = 1;
> >     t->t_need_another_keepalive = 0;
> > -   rw_exit_write(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  
> >     timeout_del_barrier(&t->t_new_handshake);
> >     timeout_del_barrier(&t->t_send_keepalive);
> > @@ -969,12 +969,12 @@ wg_timers_disable(struct wg_timers *t)
> >  void
> >  wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled) {
> >             t->t_persistent_keepalive_interval = interval;
> >             wg_timers_run_persistent_keepalive(t);
> >     }
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  int
> > @@ -1020,16 +1020,16 @@ wg_timers_event_data_sent(struct wg_time
> >     int     msecs = NEW_HANDSHAKE_TIMEOUT * 1000;
> >     msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
> >  
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled && !timeout_pending(&t->t_new_handshake))
> >             timeout_add_msec(&t->t_new_handshake, msecs);
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> >  wg_timers_event_data_received(struct wg_timers *t)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled) {
> >             if (!timeout_pending(&t->t_send_keepalive))
> >                     timeout_add_sec(&t->t_send_keepalive,
> > @@ -1037,7 +1037,7 @@ wg_timers_event_data_received(struct wg_
> >             else
> >                     t->t_need_another_keepalive = 1;
> >     }
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> > @@ -1055,11 +1055,11 @@ wg_timers_event_any_authenticated_packet
> >  void
> >  wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
> >             timeout_add_sec(&t->t_persistent_keepalive,
> >                 t->t_persistent_keepalive_interval);
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> > @@ -1068,10 +1068,10 @@ wg_timers_event_handshake_initiated(stru
> >     int     msecs = REKEY_TIMEOUT * 1000;
> >     msecs += arc4random_uniform(REKEY_TIMEOUT_JITTER);
> >  
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled)
> >             timeout_add_msec(&t->t_retry_handshake, msecs);
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> > @@ -1085,7 +1085,7 @@ wg_timers_event_handshake_responded(stru
> >  void
> >  wg_timers_event_handshake_complete(struct wg_timers *t)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled) {
> >             mtx_enter(&t->t_handshake_mtx);
> >             timeout_del(&t->t_retry_handshake);
> > @@ -1094,25 +1094,25 @@ wg_timers_event_handshake_complete(struc
> >             mtx_leave(&t->t_handshake_mtx);
> >             wg_timers_run_send_keepalive(t);
> >     }
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> >  wg_timers_event_session_derived(struct wg_timers *t)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled)
> >             timeout_add_sec(&t->t_zero_key_material, REJECT_AFTER_TIME * 3);
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> >  wg_timers_event_want_initiation(struct wg_timers *t)
> >  {
> > -   rw_enter_read(&t->t_lock);
> > +   mtx_enter(&t->t_mtx);
> >     if (!t->t_disabled)
> >             wg_timers_run_send_initiation(t, 0);
> > -   rw_exit_read(&t->t_lock);
> > +   mtx_leave(&t->t_mtx);
> >  }
> >  
> >  void
> > Index: sys/net/wg_noise.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/wg_noise.c,v
> > retrieving revision 1.6
> > diff -u -p -r1.6 wg_noise.c
> > --- sys/net/wg_noise.c      3 Feb 2023 18:31:17 -0000       1.6
> > +++ sys/net/wg_noise.c      28 Feb 2024 14:49:16 -0000
> > @@ -20,6 +20,7 @@
> >  #include <sys/systm.h>
> >  #include <sys/param.h>
> >  #include <sys/atomic.h>
> > +#include <sys/mutex.h>
> >  #include <sys/rwlock.h>
> >  
> >  #include <crypto/blake2s.h>
> > @@ -139,7 +140,7 @@ noise_remote_init(struct noise_remote *r
> >     bzero(r, sizeof(*r));
> >     memcpy(r->r_public, public, NOISE_PUBLIC_KEY_LEN);
> >     rw_init(&r->r_handshake_lock, "noise_handshake");
> > -   rw_init(&r->r_keypair_lock, "noise_keypair");
> > +   mtx_init_flags(&r->r_keypair_mtx, IPL_NET, "noise_keypair", 0);
> >  
> >     SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[0], kp_entry);
> >     SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[1], kp_entry);
> > @@ -468,10 +469,10 @@ noise_remote_begin_session(struct noise_
> >     kp.kp_remote_index = hs->hs_remote_index;
> >     getnanouptime(&kp.kp_birthdate);
> >     bzero(&kp.kp_ctr, sizeof(kp.kp_ctr));
> > -   rw_init(&kp.kp_ctr.c_lock, "noise_counter");
> > +   mtx_init_flags(&kp.kp_ctr.c_mtx, IPL_NET, "noise_counter", 0);
> >  
> >     /* Now we need to add_new_keypair */
> > -   rw_enter_write(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >     next = r->r_next;
> >     current = r->r_current;
> >     previous = r->r_previous;
> > @@ -497,7 +498,7 @@ noise_remote_begin_session(struct noise_
> >             r->r_next = noise_remote_keypair_allocate(r);
> >             *r->r_next = kp;
> >     }
> > -   rw_exit_write(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >  
> >     explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
> >     rw_exit_write(&r->r_handshake_lock);
> > @@ -514,25 +515,25 @@ noise_remote_clear(struct noise_remote *
> >     explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
> >     rw_exit_write(&r->r_handshake_lock);
> >  
> > -   rw_enter_write(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >     noise_remote_keypair_free(r, r->r_next);
> >     noise_remote_keypair_free(r, r->r_current);
> >     noise_remote_keypair_free(r, r->r_previous);
> >     r->r_next = NULL;
> >     r->r_current = NULL;
> >     r->r_previous = NULL;
> > -   rw_exit_write(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >  }
> >  
> >  void
> >  noise_remote_expire_current(struct noise_remote *r)
> >  {
> > -   rw_enter_write(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >     if (r->r_next != NULL)
> >             r->r_next->kp_valid = 0;
> >     if (r->r_current != NULL)
> >             r->r_current->kp_valid = 0;
> > -   rw_exit_write(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >  }
> >  
> >  int
> > @@ -541,7 +542,7 @@ noise_remote_ready(struct noise_remote *
> >     struct noise_keypair *kp;
> >     int ret;
> >  
> > -   rw_enter_read(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >     /* kp_ctr isn't locked here, we're happy to accept a racy read. */
> >     if ((kp = r->r_current) == NULL ||
> >         !kp->kp_valid ||
> > @@ -551,7 +552,7 @@ noise_remote_ready(struct noise_remote *
> >             ret = EINVAL;
> >     else
> >             ret = 0;
> > -   rw_exit_read(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >     return ret;
> >  }
> >  
> > @@ -562,7 +563,7 @@ noise_remote_encrypt(struct noise_remote
> >     struct noise_keypair *kp;
> >     int ret = EINVAL;
> >  
> > -   rw_enter_read(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >     if ((kp = r->r_current) == NULL)
> >             goto error;
> >  
> > @@ -601,7 +602,7 @@ noise_remote_encrypt(struct noise_remote
> >  
> >     ret = 0;
> >  error:
> > -   rw_exit_read(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >     return ret;
> >  }
> >  
> > @@ -616,7 +617,7 @@ noise_remote_decrypt(struct noise_remote
> >      * attempt the current keypair first as that is most likely. We also
> >      * want to make sure that the keypair is valid as it would be
> >      * catastrophic to decrypt against a zero'ed keypair. */
> > -   rw_enter_read(&r->r_keypair_lock);
> > +   mtx_enter(&r->r_keypair_mtx);
> >  
> >     if (r->r_current != NULL && r->r_current->kp_local_index == r_idx) {
> >             kp = r->r_current;
> > @@ -651,8 +652,6 @@ noise_remote_decrypt(struct noise_remote
> >      * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
> >      * data packet can't confirm a session that we are an INITIATOR of. */
> >     if (kp == r->r_next) {
> > -           rw_exit_read(&r->r_keypair_lock);
> > -           rw_enter_write(&r->r_keypair_lock);
> >             if (kp == r->r_next && kp->kp_local_index == r_idx) {
> >                     noise_remote_keypair_free(r, r->r_previous);
> >                     r->r_previous = r->r_current;
> > @@ -662,7 +661,6 @@ noise_remote_decrypt(struct noise_remote
> >                     ret = ECONNRESET;
> >                     goto error;
> >             }
> > -           rw_enter(&r->r_keypair_lock, RW_DOWNGRADE);
> >     }
> >  
> >     /* Similar to when we encrypt, we want to notify the caller when we
> > @@ -680,7 +678,7 @@ noise_remote_decrypt(struct noise_remote
> >     ret = 0;
> >  
> >  error:
> > -   rw_exit(&r->r_keypair_lock);
> > +   mtx_leave(&r->r_keypair_mtx);
> >     return ret;
> >  }
> >  
> > @@ -731,9 +729,9 @@ noise_counter_send(struct noise_counter 
> >     return atomic_inc_long_nv((u_long *)&ctr->c_send) - 1;
> >  #else
> >     uint64_t ret;
> > -   rw_enter_write(&ctr->c_lock);
> > +   mtx_enter(&ctr->c_mtx);
> >     ret = ctr->c_send++;
> > -   rw_exit_write(&ctr->c_lock);
> > +   mtx_leave(&ctr->c_mtx);
> >     return ret;
> >  #endif
> >  }
> > @@ -745,7 +743,7 @@ noise_counter_recv(struct noise_counter 
> >     unsigned long bit;
> >     int ret = EEXIST;
> >  
> > -   rw_enter_write(&ctr->c_lock);
> > +   mtx_enter(&ctr->c_mtx);
> >  
> >     /* Check that the recv counter is valid */
> >     if (ctr->c_recv >= REJECT_AFTER_MESSAGES ||
> > @@ -779,7 +777,7 @@ noise_counter_recv(struct noise_counter 
> >  
> >     ret = 0;
> >  error:
> > -   rw_exit_write(&ctr->c_lock);
> > +   mtx_leave(&ctr->c_mtx);
> >     return ret;
> >  }
> >  
> > @@ -976,7 +974,7 @@ noise_timer_expired(struct timespec *bir
> >  #define T_LIM (COUNTER_WINDOW_SIZE + 1)
> >  #define T_INIT do {                                \
> >     bzero(&ctr, sizeof(ctr));               \
> > -   rw_init(&ctr.c_lock, "counter");        \
> > +   mtx_init_flags(&ctr.c_mtx, IPL_NET, "counter", 0);      \
> >  } while (0)
> >  #define T(num, v, e) do {                                          \
> >     if (noise_counter_recv(&ctr, v) != e) {                         \
> > Index: sys/net/wg_noise.h
> > ===================================================================
> > RCS file: /cvs/src/sys/net/wg_noise.h,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 wg_noise.h
> > --- sys/net/wg_noise.h      9 Dec 2020 05:53:33 -0000       1.2
> > +++ sys/net/wg_noise.h      28 Feb 2024 14:49:16 -0000
> > @@ -21,6 +21,7 @@
> >  
> >  #include <sys/types.h>
> >  #include <sys/time.h>
> > +#include <sys/mutex.h>
> >  #include <sys/rwlock.h>
> >  
> >  #include <crypto/blake2s.h>
> > @@ -71,7 +72,7 @@ struct noise_handshake {
> >  };
> >  
> >  struct noise_counter {
> > -   struct rwlock            c_lock;
> > +   struct mutex             c_mtx;
> >     uint64_t                 c_send;
> >     uint64_t                 c_recv;
> >     unsigned long            c_backtrack[COUNTER_NUM];
> > @@ -100,7 +101,7 @@ struct noise_remote {
> >     uint8_t                          r_timestamp[NOISE_TIMESTAMP_LEN];
> >     struct timespec                  r_last_init; /* nanouptime */
> >  
> > -   struct rwlock                    r_keypair_lock;
> > +   struct mutex                     r_keypair_mtx;
> >     SLIST_HEAD(,noise_keypair)       r_unused_keypairs;
> >     struct noise_keypair            *r_next, *r_current, *r_previous;
> >     struct noise_keypair             r_keypair[3]; /* 3: next, current, 
> > previous. */
> > 
> 
> -- 
> :wq Claudio
> 

Reply via email to