Re: [Mesa-dev] [PATCH] st/mesa: don't do L3 thread pinning for Blender

Marek Olšák Mon, 12 Nov 2018 16:22:14 -0800

Hi Edmondo,

can you test the two attached patches? They re-enable and rework the thread
pinning.


Thanks,
Marek

On Mon, Nov 12, 2018 at 4:31 PM Edmondo Tommasina <
edmondo.tommas...@gmail.com> wrote:

> On Mon, Nov 12, 2018 at 6:43 PM Michel Dänzer <mic...@daenzer.net> wrote:
>
>> On 2018-11-08 6:23 a.m., Marek Olšák wrote:
>> > Thanks a lot man. I'll reconsider this depending on the results I
>> receive.
>> >
>> > I may also just pin the Mesa threads and keep the app thread intact. It
>> > should perform OK with glthread, but not without glthread.
>> >
>> > Another option is to have the gallium and winsys threads "chase" the
>> main
>> > thread within the CPU by changing the thread affinity based on getcpu().
>>
>> While those are interesting ideas for the future, I'm afraid it's too
>> late for them for the 18.3.0 release (scheduled for November 21st IIRC).
>>
>> Please make sure the thread pinning code is disabled for the release, at
>> least by default.
>>
>
> I'm not sure what the best solution is, but pinning the threads to
> the L3 CCX has shown great potential on my Ryzen 5 2600 and it would
> be nice to explore the ideas presented by Marek or maybe understand,
> why the kernel scheduler prefers to put the threads on cores on
> different CCX.
>
> For example The Wicher 2 goes from 60 FPS to 70 FPS average and this
> is impressive. Tomb Raider just increases about 1 FPS (average 104 FPS)
> but this can be just noise and for sure not noticeable.
>
> Regards
> edmondo
>
>

From 58dfda6bb8d72624c9f059d253c4630b8d1fa57c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Mon, 12 Nov 2018 18:10:59 -0500
Subject: [PATCH 1/2] st/mesa: regularly re-pin driver threads to the CCX where
 the app thread is

This is used when glthread is disabled.

Mesa pretty much chases the app thread on the CPU.
The performance is the same as pinning the app thread.
---
 src/mesa/state_tracker/st_context.h |  2 ++
 src/mesa/state_tracker/st_draw.c    | 32 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 14b9b018809..95133c7020f 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -193,6 +193,8 @@ struct st_context
    /** This masks out unused shader resources. Only valid in draw calls. */
    uint64_t active_states;
 
+   unsigned pin_thread_counter; /* for L3 thread pinning on AMD Zen */
+
    /* If true, further analysis of states is required to know if something
     * has changed. Used mainly for shaders.
     */
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index eb52d9505b6..5910ffa5bda 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -58,6 +58,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
+#include "util/u_cpu_detect.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
 #include "util/u_prim.h"
@@ -66,6 +67,13 @@
 #include "draw/draw_context.h"
 #include "cso_cache/cso_context.h"
 
+#ifdef PIPE_OS_LINUX
+#include <sched.h>
+#define HAVE_SCHED_GETCPU 1
+#else
+#define sched_getcpu() 0
+#define HAVE_SCHED_GETCPU 0
+#endif
 
 /**
  * Set the restart index.
@@ -122,6 +130,30 @@ prepare_draw(struct st_context *st, struct gl_context *ctx)
        st->gfx_shaders_may_be_dirty) {
       st_validate_state(st, ST_PIPELINE_RENDER);
    }
+
+   struct pipe_context *pipe = st->pipe;
+
+   /* Pin threads regularly to the same Zen CCX that the main thread is
+    * running on. The main thread can move between CCXs.
+    */
+   if (unlikely(HAVE_SCHED_GETCPU && /* Linux */
+                /* AMD Zen */
+                util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 &&
+                /* no glthread */
+                ctx->CurrentClientDispatch != ctx->MarshalExec &&
+                /* driver support */
+                pipe->set_context_param &&
+                /* do it occasionally */
+                ++st->pin_thread_counter % 512 == 0)) {
+      int cpu = sched_getcpu();
+      if (cpu >= 0) {
+         unsigned L3_cache = cpu / util_cpu_caps.cores_per_L3;
+
+         pipe->set_context_param(pipe,
+                                 PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
+                                 L3_cache);
+      }
+   }
 }
 
 /**
-- 
2.17.1

From 7886cdc2b7dd7541c93623b1509d71f4a42f3da3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Mon, 12 Nov 2018 19:09:25 -0500
Subject: [PATCH 2/2] st/mesa: pin driver threads to a fixed CCX when glthread
 is enabled

radeonsi has 3 driver threads (glthread, gallium, winsys), other drivers
may have 2 (glthread, gallium), so it makes sense to pin them to a random
CCX and keep that irrespective of the app thread.
---
 src/gallium/auxiliary/util/u_helpers.c | 63 ++++----------------------
 src/gallium/auxiliary/util/u_helpers.h |  3 +-
 src/mesa/state_tracker/st_manager.c    | 11 +++++
 3 files changed, 21 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c
index 4c70c004178..821242c0181 100644
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -121,43 +121,6 @@ util_upload_index_buffer(struct pipe_context *pipe,
    return *out_buffer != NULL;
 }
 
-#ifdef HAVE_PTHREAD_SETAFFINITY
-
-static unsigned L3_cache_number;
-static once_flag thread_pinning_once_flag = ONCE_FLAG_INIT;
-
-static void
-util_set_full_cpu_affinity(void)
-{
-   cpu_set_t cpuset;
-
-   CPU_ZERO(&cpuset);
-   for (unsigned i = 0; i < CPU_SETSIZE; i++)
-      CPU_SET(i, &cpuset);
-
-   pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
-}
-
-static void
-util_init_thread_pinning(void)
-{
-   /* Get a semi-random number. */
-   int64_t t = os_time_get_nano();
-   L3_cache_number = (t ^ (t >> 8) ^ (t >> 16));
-
-   /* Reset thread affinity for all child processes to prevent them from
-    * inheriting the current thread's affinity.
-    *
-    * XXX: If the driver is unloaded after this, and the app later calls
-    * fork(), the child process will likely crash before fork() returns,
-    * because the address where util_set_full_cpu_affinity was located
-    * will either be unmapped or point to random other contents.
-    */
-   pthread_atfork(NULL, NULL, util_set_full_cpu_affinity);
-}
-
-#endif
-
 /**
  * Called by MakeCurrent. Used to notify the driver that the application
  * thread may have been changed.
@@ -170,30 +133,21 @@ util_init_thread_pinning(void)
  *                      pinned.
  */
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread)
 {
-#ifdef HAVE_PTHREAD_SETAFFINITY
    /* If pinning has no effect, don't do anything. */
    if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
       return;
 
-   thrd_t current = thrd_current();
-   int cache = util_get_L3_for_pinned_thread(current,
-                                             util_cpu_caps.cores_per_L3);
-
-   call_once(&thread_pinning_once_flag, util_init_thread_pinning);
+   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
+                            util_cpu_caps.cores_per_L3;
 
-   /* If the main thread is not pinned, choose the L3 cache. */
-   if (cache == -1) {
-      unsigned num_L3_caches = util_cpu_caps.nr_cpus /
-                               util_cpu_caps.cores_per_L3;
-
-      /* Choose a different L3 cache for each subsequent MakeCurrent. */
-      cache = p_atomic_inc_return(&L3_cache_number) % num_L3_caches;
-      util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
-   }
+   /* Get a semi-random number. */
+   int64_t t = os_time_get_nano();
+   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
 
-   /* Tell the driver to pin its threads to the same L3 cache. */
+   /* Tell the driver to pin its threads to the selected L3 cache. */
    if (ctx->set_context_param) {
       ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
                              cache);
@@ -202,7 +156,6 @@ util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
    /* Do the same for the upper level thread if there is any (e.g. glthread) */
    if (upper_thread)
       util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
-#endif
 }
 
 /* This is a helper for hardware bring-up. Don't remove. */
diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h
index 38c47c1cc98..ed8467291ba 100644
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -52,7 +52,8 @@ bool util_upload_index_buffer(struct pipe_context *pipe,
                               unsigned *out_offset);
 
 void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread);
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+                                     thrd_t *upper_thread);
 
 struct pipe_query *
 util_begin_pipestat_query(struct pipe_context *ctx);
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 076ad42646d..73729d74545 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -800,6 +800,17 @@ st_start_thread(struct st_context_iface *stctxi)
    struct st_context *st = (struct st_context *) stctxi;
 
    _mesa_glthread_init(st->ctx);
+
+   /* Pin all driver threads to one L3 cache for optimal performance
+    * on AMD Zen. This is only done if glthread is enabled.
+    *
+    * If glthread is disabled, st_draw.c re-pins driver threads regularly
+    * based on the location of the app thread.
+    */
+   struct glthread_state *glthread = st->ctx->GLThread;
+   if (glthread && st->pipe->set_context_param) {
+      util_pin_driver_threads_to_random_L3(st->pipe, &glthread->queue.threads[0]);
+   }
 }
 
 
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/mesa: don't do L3 thread pinning for Blender

Reply via email to