From: Mathias Fröhlich <mathias.froehl...@web.de>

Hi all,

Avoid looping over all VARYING_SLOT_MAX urb_setup array
entries from genX_upload_sbe. Prepare an array indirection
to the active entries of urb_setup already in the compile
step. On upload only walk the active arrays.

The change moves down genX_upload_sbe in perf profiles by some lines.
Please review.

best

Mathias

Signed-off-by: Mathias Fröhlich <mathias.froehl...@web.de>
---
 src/intel/compiler/brw_compiler.h             |  7 +++++++
 src/intel/compiler/brw_fs.cpp                 | 13 +++++++++++++
 src/mesa/drivers/dri/i965/genX_state_upload.c |  7 +++----
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index b1086bbcee..988b926b99 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -731,6 +731,13 @@ struct brw_wm_prog_data {
     * For varying slots that are not used by the FS, the value is -1.
     */
    int urb_setup[VARYING_SLOT_MAX];
+   /**
+    * Cache structure into the urb_setup array above that contains the
+    * attribute numbers of active varyings out of urb_setup.
+    * The actual count is already available with the num_varying_inputs
+    * value above.
+    */
+   int urb_setup_attribs[VARYING_SLOT_MAX];
 };
 
 struct brw_push_const_block {
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 6fb46e7374..52c5a22b81 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1564,6 +1564,19 @@ fs_visitor::calculate_urb_setup()
          prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
    }
 
+   /*
+    * Build up an array of indices into the urb_setup array that
+    * references the active entries of the urb_setup array.
+    * Used to accelerate walking the active entries of the urb_setup array
+    * on each upload.
+    */
+   for (int attr = 0, index = 0; attr < VARYING_SLOT_MAX; attr++) {
+      int input_index = prog_data->urb_setup[attr];
+      if (input_index < 0)
+         continue;
+      prog_data->urb_setup_attribs[index++] = attr;
+   }
+
    prog_data->num_varying_inputs = urb_next;
 }
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index aa4d64d08e..e2ed4ef040 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1052,12 +1052,11 @@ genX(calculate_attr_overrides)(const struct brw_context 
*brw,
     * BRW_NEW_PRIMITIVE | BRW_NEW_GS_PROG_DATA | BRW_NEW_TES_PROG_DATA
     */
    bool drawing_points = brw_is_drawing_points(brw);
-
-   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+   for (int index = 0; index < wm_prog_data->num_varying_inputs; index++) {
+      int attr = wm_prog_data->urb_setup_attribs[index];
       int input_index = wm_prog_data->urb_setup[attr];
 
-      if (input_index < 0)
-         continue;
+      assert(0 <= input_index);
 
       /* _NEW_POINT */
       bool point_sprite = false;
-- 
2.14.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to