Mostly related, I just had a look at the code and noticed:

static uint32_t
sched_random(void)
{
        uint32_t *rndptr;

        rndptr = DPCPU_PTR(randomval);
        *rndptr = *rndptr * 69069 + 5;

        return (*rndptr >> 16);
}

Except randomval starts at 0 for all CPUs. Should be easy to pre-init
with an actual random value. Is that something you played with?

On 8/2/21, Alexander Motin <m...@freebsd.org> wrote:
> The branch main has been updated by mav:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=8bb173fb5bc33a02d5a4670c9a60bba0ece07bac
>
> commit 8bb173fb5bc33a02d5a4670c9a60bba0ece07bac
> Author:     Alexander Motin <m...@freebsd.org>
> AuthorDate: 2021-08-02 02:42:01 +0000
> Commit:     Alexander Motin <m...@freebsd.org>
> CommitDate: 2021-08-02 02:42:01 +0000
>
>     sched_ule(4): Use trylock when stealing load.
>
>     On some load patterns it is possible for several CPUs to try steal
>     thread from the same CPU despite randomization introduced.  It may
>     cause significant lock contention when holding one queue lock idle
>     thread tries to acquire another one.  Use of trylock on the remote
>     queue allows both reduce the contention and handle lock ordering
>     easier.  If we can't get lock inside tdq_trysteal() we just return,
>     allowing tdq_idled() handle it.  If it happens in tdq_idled(), then
>     we repeat search for load skipping this CPU.
>
>     On 2-socket 80-thread Xeon system I am observing dramatic reduction
>     of the lock spinning time when doing random uncached 4KB reads from
>     12 ZVOLs, while IOPS increase from 327K to 403K.
>
>     MFC after:      1 month
> ---
>  sys/kern/sched_ule.c | 39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)
>
> diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
> index 028e07efa889..1bdcfb1f793d 100644
> --- a/sys/kern/sched_ule.c
> +++ b/sys/kern/sched_ule.c
> @@ -300,6 +300,8 @@ static struct tdq tdq_cpu;
>  #define      TDQ_LOCK_ASSERT(t, type)        mtx_assert(TDQ_LOCKPTR((t)), 
> (type))
>  #define      TDQ_LOCK(t)             mtx_lock_spin(TDQ_LOCKPTR((t)))
>  #define      TDQ_LOCK_FLAGS(t, f)    mtx_lock_spin_flags(TDQ_LOCKPTR((t)), 
> (f))
> +#define      TDQ_TRYLOCK(t)          mtx_trylock_spin(TDQ_LOCKPTR((t)))
> +#define      TDQ_TRYLOCK_FLAGS(t, f) mtx_trylock_spin_flags(TDQ_LOCKPTR((t)),
> (f))
>  #define      TDQ_UNLOCK(t)           mtx_unlock_spin(TDQ_LOCKPTR((t)))
>  #define      TDQ_LOCKPTR(t)          ((struct mtx *)(&(t)->tdq_lock))
>
> @@ -989,13 +991,22 @@ tdq_idled(struct tdq *tdq)
>               if (steal->tdq_load < steal_thresh ||
>                   steal->tdq_transferable == 0)
>                       goto restart;
> -             tdq_lock_pair(tdq, steal);
>               /*
> -              * We were assigned a thread while waiting for the locks.
> -              * Switch to it now instead of stealing a thread.
> +              * Try to lock both queues. If we are assigned a thread while
> +              * waited for the lock, switch to it now instead of stealing.
> +              * If we can't get the lock, then somebody likely got there
> +              * first so continue searching.
>                */
> -             if (tdq->tdq_load)
> -                     break;
> +             TDQ_LOCK(tdq);
> +             if (tdq->tdq_load > 0) {
> +                     mi_switch(SW_VOL | SWT_IDLE);
> +                     return (0);
> +             }
> +             if (TDQ_TRYLOCK_FLAGS(steal, MTX_DUPOK) == 0) {
> +                     TDQ_UNLOCK(tdq);
> +                     CPU_CLR(cpu, &mask);
> +                     continue;
> +             }
>               /*
>                * The data returned by sched_highest() is stale and
>                * the chosen CPU no longer has an eligible thread, or
> @@ -1948,18 +1959,18 @@ tdq_trysteal(struct tdq *tdq)
>               if (steal->tdq_load < steal_thresh ||
>                   steal->tdq_transferable == 0)
>                       continue;
> -             tdq_lock_pair(tdq, steal);
>               /*
> -              * If we get to this point, unconditonally exit the loop
> -              * to bound the time spent in the critcal section.
> -              *
> -              * If a thread was added while interrupts were disabled don't
> -              * steal one here.
> +              * Try to lock both queues. If we are assigned a thread while
> +              * waited for the lock, switch to it now instead of stealing.
> +              * If we can't get the lock, then somebody likely got there
> +              * first.  At this point unconditonally exit the loop to
> +              * bound the time spent in the critcal section.
>                */
> -             if (tdq->tdq_load > 0) {
> -                     TDQ_UNLOCK(steal);
> +             TDQ_LOCK(tdq);
> +             if (tdq->tdq_load > 0)
> +                     break;
> +             if (TDQ_TRYLOCK_FLAGS(steal, MTX_DUPOK) == 0)
>                       break;
> -             }
>               /*
>                * The data returned by sched_highest() is stale and
>                   * the chosen CPU no longer has an eligible thread.
>


-- 
Mateusz Guzik <mjguzik gmail.com>
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to