On Mon, Jan 7, 2019 at 6:29 PM Jason Ekstrand <ja...@jlekstrand.net> wrote: > > > On Tue, Dec 4, 2018 at 12:27 PM Karol Herbst <kher...@redhat.com> wrote: >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/amd/vulkan/radv_meta_buffer.c | 8 ++-- >> src/amd/vulkan/radv_meta_bufimage.c | 28 +++++++------- >> src/amd/vulkan/radv_meta_clear.c | 4 +- >> src/amd/vulkan/radv_meta_fast_clear.c | 4 +- >> src/amd/vulkan/radv_meta_resolve_cs.c | 4 +- >> src/amd/vulkan/radv_query.c | 12 +++--- >> src/compiler/nir/nir_intrinsics.py | 12 +++--- >> src/compiler/nir/nir_lower_system_values.c | 43 +++++++++++++--------- >> 8 files changed, 61 insertions(+), 54 deletions(-) >> >> diff --git a/src/amd/vulkan/radv_meta_buffer.c >> b/src/amd/vulkan/radv_meta_buffer.c >> index 76854d7bbad..208988c3775 100644 >> --- a/src/amd/vulkan/radv_meta_buffer.c >> +++ b/src/amd/vulkan/radv_meta_buffer.c >> @@ -15,8 +15,8 @@ build_buffer_fill_shader(struct radv_device *dev) >> b.shader->info.cs.local_size[1] = 1; >> b.shader->info.cs.local_size[2] = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -67,8 +67,8 @@ build_buffer_copy_shader(struct radv_device *dev) >> b.shader->info.cs.local_size[1] = 1; >> b.shader->info.cs.local_size[2] = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/amd/vulkan/radv_meta_bufimage.c >> b/src/amd/vulkan/radv_meta_bufimage.c >> index 45df8438234..c8a733b3062 100644 >> --- a/src/amd/vulkan/radv_meta_bufimage.c >> +++ b/src/amd/vulkan/radv_meta_bufimage.c >> @@ -60,8 +60,8 @@ build_nir_itob_compute_shader(struct radv_device *dev, >> bool is_3d) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -289,8 +289,8 @@ build_nir_btoi_compute_shader(struct radv_device *dev, >> bool is_3d) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -511,8 +511,8 @@ build_nir_btoi_r32g32b32_compute_shader(struct >> radv_device *dev) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -719,8 +719,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, >> bool is_3d) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -932,8 +932,8 @@ build_nir_itoi_r32g32b32_compute_shader(struct >> radv_device *dev) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -1139,8 +1139,8 @@ build_nir_cleari_compute_shader(struct radv_device >> *dev, bool is_3d) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 0; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -1331,8 +1331,8 @@ build_nir_cleari_r32g32b32_compute_shader(struct >> radv_device *dev) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 0; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/amd/vulkan/radv_meta_clear.c >> b/src/amd/vulkan/radv_meta_clear.c >> index 5805d39c4b3..2e1274c929d 100644 >> --- a/src/amd/vulkan/radv_meta_clear.c >> +++ b/src/amd/vulkan/radv_meta_clear.c >> @@ -1025,8 +1025,8 @@ build_clear_htile_mask_shader() >> b.shader->info.cs.local_size[1] = 1; >> b.shader->info.cs.local_size[2] = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c >> b/src/amd/vulkan/radv_meta_fast_clear.c >> index f2f68961e2e..136b596aa4f 100644 >> --- a/src/amd/vulkan/radv_meta_fast_clear.c >> +++ b/src/amd/vulkan/radv_meta_fast_clear.c >> @@ -58,8 +58,8 @@ build_dcc_decompress_compute_shader(struct radv_device >> *dev) >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c >> b/src/amd/vulkan/radv_meta_resolve_cs.c >> index e56df7f8a59..1ee8ce32ac0 100644 >> --- a/src/amd/vulkan/radv_meta_resolve_cs.c >> +++ b/src/amd/vulkan/radv_meta_resolve_cs.c >> @@ -99,8 +99,8 @@ build_resolve_compute_shader(struct radv_device *dev, bool >> is_integer, bool is_s >> img_type, "out_img"); >> output_img->data.descriptor_set = 0; >> output_img->data.binding = 1; >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c >> index d3baf2357ff..a7d53b938a7 100644 >> --- a/src/amd/vulkan/radv_query.c >> +++ b/src/amd/vulkan/radv_query.c >> @@ -153,8 +153,8 @@ build_occlusion_query_shader(struct radv_device *device) >> { >> nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); >> nir_builder_instr_insert(&b, &src_buf->instr); >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -343,8 +343,8 @@ build_pipeline_statistics_query_shader(struct >> radv_device *device) { >> nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); >> nir_builder_instr_insert(&b, &src_buf->instr); >> >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> @@ -590,8 +590,8 @@ build_tfb_query_shader(struct radv_device *device) >> nir_builder_instr_insert(&b, &src_buf->instr); >> >> /* Compute global ID. */ >> - nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); >> - nir_ssa_def *wg_id = nir_load_work_group_id(&b); >> + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b, 32); >> + nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32); >> nir_ssa_def *block_size = nir_imm_ivec4(&b, >> >> b.shader->info.cs.local_size[0], >> >> b.shader->info.cs.local_size[1], >> diff --git a/src/compiler/nir/nir_intrinsics.py >> b/src/compiler/nir/nir_intrinsics.py >> index 830c406b450..746ddd22d92 100644 >> --- a/src/compiler/nir/nir_intrinsics.py >> +++ b/src/compiler/nir/nir_intrinsics.py >> @@ -479,11 +479,11 @@ system_value("tess_coord", 3) >> system_value("tess_level_outer", 4) >> system_value("tess_level_inner", 2) >> system_value("patch_vertices_in", 1) >> -system_value("local_invocation_id", 3) >> -system_value("local_invocation_index", 1) >> -system_value("work_group_id", 3) >> +system_value("local_invocation_id", 3, bit_sizes=[32, 64]) >> +system_value("local_invocation_index", 1, bit_sizes=[32, 64]) >> +system_value("work_group_id", 3, bit_sizes=[32, 64]) >> system_value("user_clip_plane", 4, indices=[UCP_ID]) >> -system_value("num_work_groups", 3) >> +system_value("num_work_groups", 3, bit_sizes=[32, 64]) >> system_value("helper_invocation", 1) >> system_value("alpha_ref_float", 1) >> system_value("layer_id", 1) >> @@ -497,8 +497,8 @@ system_value("subgroup_le_mask", 0, bit_sizes=[32, 64]) >> system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64]) >> system_value("num_subgroups", 1) >> system_value("subgroup_id", 1) >> -system_value("local_group_size", 3) >> -system_value("global_invocation_id", 3) >> +system_value("local_group_size", 3, bit_sizes=[32, 64]) >> +system_value("global_invocation_id", 3, bit_sizes=[32, 64]) >> system_value("work_dim", 1) >> >> # Blend constant color values. Float values are clamped.# >> diff --git a/src/compiler/nir/nir_lower_system_values.c >> b/src/compiler/nir/nir_lower_system_values.c >> index 68b0ea89c8d..3cb9f224ecd 100644 >> --- a/src/compiler/nir/nir_lower_system_values.c >> +++ b/src/compiler/nir/nir_lower_system_values.c >> @@ -29,7 +29,7 @@ >> #include "nir_builder.h" >> >> static nir_ssa_def* >> -build_local_group_size(nir_builder *b) >> +build_local_group_size(nir_builder *b, unsigned bit_size) >> { >> nir_ssa_def *local_size; >> >> @@ -38,21 +38,27 @@ build_local_group_size(nir_builder *b) >> * point, but its intrinsic can still be used. >> */ >> if (b->shader->info.cs.local_size_variable) { >> - local_size = nir_load_local_group_size(b); >> + local_size = nir_load_local_group_size(b, bit_size); >> } else { >> nir_const_value local_size_const; >> memset(&local_size_const, 0, sizeof(local_size_const)); >> - local_size_const.u32[0] = b->shader->info.cs.local_size[0]; >> - local_size_const.u32[1] = b->shader->info.cs.local_size[1]; >> - local_size_const.u32[2] = b->shader->info.cs.local_size[2]; >> - local_size = nir_build_imm(b, 3, 32, local_size_const); >> + if (bit_size == 32) { >> + local_size_const.u32[0] = b->shader->info.cs.local_size[0]; >> + local_size_const.u32[1] = b->shader->info.cs.local_size[1]; >> + local_size_const.u32[2] = b->shader->info.cs.local_size[2]; >> + } else { > > > assert(bit_size == 64); > >> >> + local_size_const.u64[0] = b->shader->info.cs.local_size[0]; >> + local_size_const.u64[1] = b->shader->info.cs.local_size[1]; >> + local_size_const.u64[2] = b->shader->info.cs.local_size[2]; >> + } >> + local_size = nir_build_imm(b, 3, bit_size, local_size_const); >> } >> >> return local_size; >> } >> >> static nir_ssa_def * >> -build_local_invocation_id(nir_builder *b) >> +build_local_invocation_id(nir_builder *b, unsigned bit_size) >> { >> if (b->shader->options->lower_cs_local_id_from_index) { >> /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based >> @@ -72,8 +78,8 @@ build_local_invocation_id(nir_builder *b) >> * accidentally end up with a gl_LocalInvocationIndex that is too >> * large so it can safely be omitted. >> */ >> - nir_ssa_def *local_index = nir_load_local_invocation_index(b); >> - nir_ssa_def *local_size = build_local_group_size(b); >> + nir_ssa_def *local_index = nir_load_local_invocation_index(b, >> bit_size); >> + nir_ssa_def *local_size = build_local_group_size(b, bit_size); > > > In my iris clover branch, I instead have a line at that just does "if > (bit_size == 64) id = nir_u2u64(b, id)" where "id" is the final computed 3D > ID just before returning it. This lets us satisfy the CL requirements while > still only using 32-bit system values and doing the calculation in 32 bits. > >> >> >> nir_ssa_def *id_x, *id_y, *id_z; >> id_x = nir_umod(b, local_index, >> @@ -86,7 +92,7 @@ build_local_invocation_id(nir_builder *b) >> nir_channel(b, local_size, 1))); > > > In other words, righ here we have > > nir_ssa_def *id = nir_vec3(b, id_x, id_y, id_z); > > if (bit_size == 64) > id = nir_u2u64(b, id); > > return id; > > It's kind of nice to avoid unneeded 64-bit math especially when some of that > math is division/modulus. :-) >
yeah, I think for the local id we can assume it should fit nicely within 32 bit >> >> return nir_vec3(b, id_x, id_y, id_z); >> } else { >> - return nir_load_local_invocation_id(b); >> + return nir_load_local_invocation_id(b, bit_size); >> } >> } >> >> @@ -120,6 +126,7 @@ convert_block(nir_block *block, nir_builder *b) >> >> b->cursor = nir_after_instr(&load_deref->instr); >> >> + unsigned bit_size = nir_dest_bit_size(load_deref->dest); >> nir_ssa_def *sysval = NULL; >> switch (var->data.location) { >> case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: { >> @@ -128,9 +135,9 @@ convert_block(nir_block *block, nir_builder *b) >> * "The value of gl_GlobalInvocationID is equal to >> * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID" >> */ >> - nir_ssa_def *group_size = build_local_group_size(b); >> - nir_ssa_def *group_id = nir_load_work_group_id(b); >> - nir_ssa_def *local_id = build_local_invocation_id(b); >> + nir_ssa_def *group_size = build_local_group_size(b, bit_size); >> + nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); >> + nir_ssa_def *local_id = build_local_invocation_id(b, bit_size); >> >> sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id); >> break; >> @@ -150,7 +157,7 @@ convert_block(nir_block *block, nir_builder *b) >> * gl_WorkGroupSize.y + gl_LocalInvocationID.y * >> * gl_WorkGroupSize.x + gl_LocalInvocationID.x" >> */ >> - nir_ssa_def *local_id = nir_load_local_invocation_id(b); >> + nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size); >> >> nir_ssa_def *size_x = >> nir_imm_int(b, b->shader->info.cs.local_size[0]); >> @@ -170,11 +177,11 @@ convert_block(nir_block *block, nir_builder *b) >> * index from the local id. >> */ >> if (b->shader->options->lower_cs_local_id_from_index) >> - sysval = build_local_invocation_id(b); >> + sysval = build_local_invocation_id(b, bit_size); >> break; >> >> case SYSTEM_VALUE_LOCAL_GROUP_SIZE: { >> - sysval = build_local_group_size(b); >> + sysval = build_local_group_size(b, bit_size); >> break; >> } >> >> @@ -248,8 +255,8 @@ convert_block(nir_block *block, nir_builder *b) >> break; >> >> case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: { >> - nir_ssa_def *group_size = build_local_group_size(b); >> - nir_ssa_def *num_work_groups = nir_load_num_work_groups(b); >> + nir_ssa_def *group_size = build_local_group_size(b, bit_size); >> + nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, >> bit_size); >> sysval = nir_imul(b, group_size, num_work_groups); >> break; >> } >> -- >> 2.19.2 >> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev