The old method pushed data for each channels uvec3 data of

The new method pushes 1 dword of data that is a 'thread local ID'
value. Based on that value, we can generate gl_LocalInvocationIndex
and gl_LocalInvocationID with some simple calculations.

Signed-off-by: Jordan Justen <>
 src/mesa/drivers/dri/i965/brw_compiler.h |  8 ---
 src/mesa/drivers/dri/i965/brw_fs.cpp     | 88 --------------------------------
 src/mesa/drivers/dri/i965/brw_fs.h       |  1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  7 ---
 4 files changed, 104 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
index dda6297..6e6d20c 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -439,7 +439,6 @@ struct brw_cs_prog_data {
    unsigned threads;
    bool uses_barrier;
    bool uses_num_work_groups;
-   unsigned local_invocation_id_regs;
    int thread_local_id_index;
    struct {
@@ -831,13 +830,6 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
                unsigned *final_assembly_size,
                char **error_str);
- * Fill out local id payload for compute shader according to cs_prog_data.
- */
-brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
-                             void *buffer, uint32_t threads, uint32_t stride);
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
index b93f841..6bf720b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5514,31 +5514,6 @@ fs_visitor::setup_vs_payload()
    payload.num_regs = 2;
- * We are building the local ID push constant data using the simplest possible
- * method. We simply push the local IDs directly as they should appear in the
- * registers for the uvec3 gl_LocalInvocationID variable.
- *
- * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
- * registers worth of push constant space.
- *
- * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
- * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
- * to coordinated.
- *
- * FINISHME: There are a few easy optimizations to consider.
- *
- * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
- *    no need for using push constant space for that dimension.
- *
- * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
- *    easily use 16-bit words rather than 32-bit dwords in the push constant
- *    data.
- *
- * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
- *    conveying the data, and thereby reduce push constant usage.
- *
- */
@@ -5582,15 +5557,7 @@ void
    assert(devinfo->gen >= 7);
-   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
    payload.num_regs = 1;
-   if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
-      payload.local_invocation_id_reg = payload.num_regs;
-      payload.num_regs += prog_data->local_invocation_id_regs;
-   }
@@ -6442,25 +6409,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
 fs_reg *
-   assert(stage == MESA_SHADER_COMPUTE);
-   fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
-   struct brw_reg src =
-      brw_vec8_grf(payload.local_invocation_id_reg, 0);
-   src = retype(src, BRW_REGISTER_TYPE_UD);
-   bld.MOV(*reg, src);
- += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 1), src);
- += dispatch_width / 8;
-   bld.MOV(offset(*reg, bld, 2), src);
-   return reg;
-fs_reg *
    assert(stage == MESA_SHADER_COMPUTE);
@@ -6673,39 +6621,3 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
    return g.get_assembly(final_assembly_size);
-brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
-                             void *buffer, uint32_t threads, uint32_t stride)
-   if (prog_data->local_invocation_id_regs == 0)
-      return;
-   /* 'stride' should be an integer number of registers, that is, a multiple
-    * of 32 bytes.
-    */
-   assert(stride % 32 == 0);
-   unsigned x = 0, y = 0, z = 0;
-   for (unsigned t = 0; t < threads; t++) {
-      uint32_t *param = (uint32_t *) buffer + stride * t / 4;
-      for (unsigned i = 0; i < prog_data->simd_size; i++) {
-         param[0 * prog_data->simd_size + i] = x;
-         param[1 * prog_data->simd_size + i] = y;
-         param[2 * prog_data->simd_size + i] = z;
-         x++;
-         if (x == prog_data->local_size[0]) {
-            x = 0;
-            y++;
-            if (y == prog_data->local_size[1]) {
-               y = 0;
-               z++;
-               if (z == prog_data->local_size[2])
-                  z = 0;
-            }
-         }
-      }
-   }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
index d28384d..de39f74 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -281,7 +281,6 @@ public:
                            unsigned base_offset, const nir_src &offset_src,
                            unsigned num_components);
    void emit_cs_terminate();
-   fs_reg *emit_cs_local_invocation_id_setup();
    fs_reg *emit_cs_work_group_id_setup();
    void emit_barrier();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
index 8a45cc6..beaa2d4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -272,13 +272,6 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
             *reg = *v->emit_samplemaskin_setup();
-      case nir_intrinsic_load_local_invocation_id:
-         assert(v->stage == MESA_SHADER_COMPUTE);
-         reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
-         if (reg->file == BAD_FILE)
-            *reg = *v->emit_cs_local_invocation_id_setup();
-         break;
       case nir_intrinsic_load_work_group_id:
          assert(v->stage == MESA_SHADER_COMPUTE);
          reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];

mesa-dev mailing list

Reply via email to