On Fri, Mar 23, 2018 at 9:15 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > On Fri, Mar 23, 2018 at 12:33 PM, Karol Herbst <kher...@redhat.com> wrote: >> >> With OpenCL the size of some system value depends on the Physical model >> choosen, so we need a way to load any system value as 32 or 64 bit. >> >> Signed-off-by: Karol Herbst <kher...@redhat.com> >> --- >> src/compiler/nir/nir_builder.h | 10 +++++--- >> src/compiler/nir/nir_lower_alpha_test.c | 2 +- >> src/compiler/nir/nir_lower_clip.c | 3 ++- >> src/compiler/nir/nir_lower_subgroups.c | 8 +++--- >> src/compiler/nir/nir_lower_system_values.c | 31 >> ++++++++++++------------ >> src/compiler/nir/nir_lower_two_sided_color.c | 2 +- >> src/compiler/nir/nir_lower_wpos_center.c | 2 +- >> src/compiler/spirv/vtn_subgroup.c | 2 +- >> src/gallium/auxiliary/nir/tgsi_to_nir.c | 3 ++- >> src/intel/blorp/blorp_blit.c | 2 +- >> src/intel/blorp/blorp_clear.c | 2 +- >> src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 6 ++--- >> src/mesa/drivers/dri/i965/brw_tcs.c | 2 +- >> 13 files changed, 40 insertions(+), 35 deletions(-) >> >> diff --git a/src/compiler/nir/nir_builder.h >> b/src/compiler/nir/nir_builder.h >> index 36e0ae3ac63..4e93cd08169 100644 >> --- a/src/compiler/nir/nir_builder.h >> +++ b/src/compiler/nir/nir_builder.h >> @@ -612,13 +612,14 @@ nir_copy_var(nir_builder *build, nir_variable *dest, >> nir_variable *src) >> >> /* Generic builder for system values. */ >> static inline nir_ssa_def * >> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) >> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index, >> + unsigned bit_size) >> { >> nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, >> op); >> load->num_components = nir_intrinsic_infos[op].dest_components; >> load->const_index[0] = index; >> nir_ssa_dest_init(&load->instr, &load->dest, >> - nir_intrinsic_infos[op].dest_components, 32, NULL); >> + nir_intrinsic_infos[op].dest_components, bit_size, >> NULL); >> nir_builder_instr_insert(build, &load->instr); >> return &load->dest.ssa; >> } >> @@ -630,9 +631,10 @@ nir_load_system_value(nir_builder *build, >> nir_intrinsic_op op, int index) >> >> #define DEFINE_SYSTEM_VALUE(name) >> \ >> static inline nir_ssa_def * >> \ >> - nir_load_##name(nir_builder *build) >> \ >> + nir_load_##name(nir_builder *build, unsigned bit_size) >> \ > > > I was really hoping that this change wouldn't touch every single intrinsic > helper. Maybe with Rob's python-based intrinsics table we can do something > better. >
I was kind of thinking of declaring builtins as either 32, 64 or 32/64 bit and just generate a function with a bit_size argument for the later maybe, but I think we really want to do that in python and not with C preprocessor macros :) >> >> { >> \ >> - return nir_load_system_value(build, nir_intrinsic_load_##name, 0); >> \ >> + return nir_load_system_value(build, nir_intrinsic_load_##name, 0, >> \ >> + bit_size); >> \ >> } >> >> #include "nir_intrinsics.h" >> diff --git a/src/compiler/nir/nir_lower_alpha_test.c >> b/src/compiler/nir/nir_lower_alpha_test.c >> index 6bf9ff142df..29f91ab9428 100644 >> --- a/src/compiler/nir/nir_lower_alpha_test.c >> +++ b/src/compiler/nir/nir_lower_alpha_test.c >> @@ -92,7 +92,7 @@ nir_lower_alpha_test(nir_shader *shader, enum >> compare_func func, >> >> nir_ssa_def *condition = >> nir_compare_func(&b, func, >> - alpha, nir_load_alpha_ref_float(&b)); >> + alpha, nir_load_alpha_ref_float(&b, >> 32)); >> >> nir_intrinsic_instr *discard = >> nir_intrinsic_instr_create(b.shader, >> diff --git a/src/compiler/nir/nir_lower_clip.c >> b/src/compiler/nir/nir_lower_clip.c >> index ea12f51a7bb..b9a91f7d40b 100644 >> --- a/src/compiler/nir/nir_lower_clip.c >> +++ b/src/compiler/nir/nir_lower_clip.c >> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned >> ucp_enables, >> for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { >> if (ucp_enables & (1 << plane)) { >> nir_ssa_def *ucp = >> - nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, >> plane); >> + nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, >> + plane, 32); >> >> /* calculate clipdist[plane] - dot(ucp, cv): */ >> clipdist[plane] = nir_fdot4(&b, ucp, cv); >> diff --git a/src/compiler/nir/nir_lower_subgroups.c >> b/src/compiler/nir/nir_lower_subgroups.c >> index 0d3c83b7951..7e910c013a9 100644 >> --- a/src/compiler/nir/nir_lower_subgroups.c >> +++ b/src/compiler/nir/nir_lower_subgroups.c >> @@ -190,7 +190,7 @@ static nir_ssa_def * >> lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, >> bool lower_to_scalar) >> { >> - nir_ssa_def *index = nir_load_subgroup_invocation(b); >> + nir_ssa_def *index = nir_load_subgroup_invocation(b, 32); >> switch (intrin->intrinsic) { >> case nir_intrinsic_shuffle_xor: >> assert(intrin->src[1].is_ssa); >> @@ -300,7 +300,7 @@ lower_subgroups_intrin(nir_builder *b, >> nir_intrinsic_instr *intrin, >> assert(options->subgroup_size <= 64); >> uint64_t group_mask = ~0ull >> (64 - options->subgroup_size); >> >> - nir_ssa_def *count = nir_load_subgroup_invocation(b); >> + nir_ssa_def *count = nir_load_subgroup_invocation(b, 32); >> nir_ssa_def *val; >> switch (intrin->intrinsic) { >> case nir_intrinsic_load_subgroup_eq_mask: >> @@ -373,7 +373,7 @@ lower_subgroups_intrin(nir_builder *b, >> nir_intrinsic_instr *intrin, >> >> case nir_intrinsic_ballot_bit_count_exclusive: >> case nir_intrinsic_ballot_bit_count_inclusive: { >> - nir_ssa_def *count = nir_load_subgroup_invocation(b); >> + nir_ssa_def *count = nir_load_subgroup_invocation(b, 32); >> nir_ssa_def *mask = nir_imm_intN_t(b, ~0ull, >> options->ballot_bit_size); >> if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) >> { >> const unsigned bits = options->ballot_bit_size; >> @@ -396,7 +396,7 @@ lower_subgroups_intrin(nir_builder *b, >> nir_intrinsic_instr *intrin, >> nir_ssa_dest_init(&first->instr, &first->dest, 1, 32, NULL); >> nir_builder_instr_insert(b, &first->instr); >> >> - return nir_ieq(b, nir_load_subgroup_invocation(b), >> &first->dest.ssa); >> + return nir_ieq(b, nir_load_subgroup_invocation(b, 32), >> &first->dest.ssa); >> } >> >> case nir_intrinsic_shuffle: >> diff --git a/src/compiler/nir/nir_lower_system_values.c >> b/src/compiler/nir/nir_lower_system_values.c >> index fb560ee21bb..d507c28f421 100644 >> --- a/src/compiler/nir/nir_lower_system_values.c >> +++ b/src/compiler/nir/nir_lower_system_values.c >> @@ -46,6 +46,7 @@ convert_block(nir_block *block, nir_builder *b) >> if (var->data.mode != nir_var_system_value) >> continue; >> >> + unsigned bit_size = load_var->dest.ssa.bit_size; >> b->cursor = nir_after_instr(&load_var->instr); >> >> nir_ssa_def *sysval = NULL; >> @@ -59,15 +60,15 @@ convert_block(nir_block *block, nir_builder *b) >> >> nir_const_value local_size; >> memset(&local_size, 0, sizeof(local_size)); >> - local_size.u32[0] = b->shader->info.cs.local_size[0]; >> - local_size.u32[1] = b->shader->info.cs.local_size[1]; >> - local_size.u32[2] = b->shader->info.cs.local_size[2]; >> + local_size.u64[0] = b->shader->info.cs.local_size[0]; >> + local_size.u64[1] = b->shader->info.cs.local_size[1]; >> + local_size.u64[2] = b->shader->info.cs.local_size[2]; >> >> - nir_ssa_def *group_id = nir_load_work_group_id(b); >> - nir_ssa_def *local_id = nir_load_local_invocation_id(b); >> + nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size); >> + nir_ssa_def *local_id = nir_load_local_invocation_id(b, >> bit_size); >> >> sysval = nir_iadd(b, nir_imul(b, group_id, >> - nir_build_imm(b, 3, 32, >> local_size)), >> + nir_build_imm(b, 3, bit_size, >> local_size)), > > > This doesn't do what you think it does. Due to the way that the different > arrays in nir_const_value alias, you can't put 64-bit values in the > nir_const_value and then use 32 for nir_build_imm and expect it to work. We > can either make a smarter immediate builder or just insert a u2u64 > instruction which will get properly constant folded. > I see. >> >> local_id); >> break; >> } >> @@ -86,12 +87,12 @@ convert_block(nir_block *block, nir_builder *b) >> * gl_WorkGroupSize.y + gl_LocalInvocationID.y * >> * gl_WorkGroupSize.x + gl_LocalInvocationID.x" >> */ >> - nir_ssa_def *local_id = nir_load_local_invocation_id(b); >> + nir_ssa_def *local_id = nir_load_local_invocation_id(b, >> bit_size); >> >> nir_ssa_def *size_x = >> - nir_imm_int(b, b->shader->info.cs.local_size[0]); >> + nir_imm_intN_t(b, b->shader->info.cs.local_size[0], >> bit_size); >> nir_ssa_def *size_y = >> - nir_imm_int(b, b->shader->info.cs.local_size[1]); >> + nir_imm_intN_t(b, b->shader->info.cs.local_size[1], >> bit_size); >> >> sysval = nir_imul(b, nir_channel(b, local_id, 2), >> nir_imul(b, size_x, size_y)); >> @@ -104,17 +105,17 @@ convert_block(nir_block *block, nir_builder *b) >> case SYSTEM_VALUE_VERTEX_ID: >> if (b->shader->options->vertex_id_zero_based) { >> sysval = nir_iadd(b, >> - nir_load_vertex_id_zero_base(b), >> - nir_load_base_vertex(b)); >> + nir_load_vertex_id_zero_base(b, bit_size), >> + nir_load_base_vertex(b, bit_size)); >> } else { >> - sysval = nir_load_vertex_id(b); >> + sysval = nir_load_vertex_id(b, bit_size); >> } >> break; >> >> case SYSTEM_VALUE_INSTANCE_INDEX: >> sysval = nir_iadd(b, >> - nir_load_instance_id(b), >> - nir_load_base_instance(b)); >> + nir_load_instance_id(b, bit_size), >> + nir_load_base_instance(b, bit_size)); >> break; >> >> case SYSTEM_VALUE_SUBGROUP_EQ_MASK: >> @@ -145,7 +146,7 @@ convert_block(nir_block *block, nir_builder *b) >> if (sysval == NULL) { >> nir_intrinsic_op sysval_op = >> nir_intrinsic_from_system_value(var->data.location); >> - sysval = nir_load_system_value(b, sysval_op, 0); >> + sysval = nir_load_system_value(b, sysval_op, 0, bit_size); >> } >> >> nir_ssa_def_rewrite_uses(&load_var->dest.ssa, >> nir_src_for_ssa(sysval)); >> diff --git a/src/compiler/nir/nir_lower_two_sided_color.c >> b/src/compiler/nir/nir_lower_two_sided_color.c >> index b6742ab2462..20af88b6aec 100644 >> --- a/src/compiler/nir/nir_lower_two_sided_color.c >> +++ b/src/compiler/nir/nir_lower_two_sided_color.c >> @@ -158,7 +158,7 @@ nir_lower_two_sided_color_block(nir_block *block, >> * bcsel(load_system_value(FACE), load_input(COLn), >> load_input(BFCn)) >> */ >> b->cursor = nir_before_instr(&intr->instr); >> - nir_ssa_def *face = nir_load_front_face(b); >> + nir_ssa_def *face = nir_load_front_face(b, 32); >> nir_ssa_def *front = load_input(b, state->colors[idx].front); >> nir_ssa_def *back = load_input(b, state->colors[idx].back); >> nir_ssa_def *color = nir_bcsel(b, face, front, back); >> diff --git a/src/compiler/nir/nir_lower_wpos_center.c >> b/src/compiler/nir/nir_lower_wpos_center.c >> index dca810d735e..a0d9719e270 100644 >> --- a/src/compiler/nir/nir_lower_wpos_center.c >> +++ b/src/compiler/nir/nir_lower_wpos_center.c >> @@ -58,7 +58,7 @@ update_fragcoord(nir_builder *b, nir_intrinsic_instr >> *intr, >> wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f)); >> } else { >> nir_ssa_def *spos = >> - nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0); >> + nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0, 32); >> >> wpos = nir_fadd(b, wpos, >> nir_vec4(b, >> diff --git a/src/compiler/spirv/vtn_subgroup.c >> b/src/compiler/spirv/vtn_subgroup.c >> index bd3143962be..50a4ecc2dcc 100644 >> --- a/src/compiler/spirv/vtn_subgroup.c >> +++ b/src/compiler/spirv/vtn_subgroup.c >> @@ -110,7 +110,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp >> opcode, >> >> nir_intrinsic_ballot_bitfield_extract); >> >> intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def); >> - intrin->src[1] = >> nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb)); >> + intrin->src[1] = >> nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb, 32)); >> >> nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL); >> nir_builder_instr_insert(&b->nb, &intrin->instr); >> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> index f8df4c10137..852b24eaaf1 100644 >> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> @@ -610,7 +610,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, >> unsigned file, unsigned index, >> nir_ssa_def *tgsi_frontface[4] = { >> nir_bcsel(&c->build, >> nir_load_system_value(&c->build, >> - >> nir_intrinsic_load_front_face, 0), >> + >> nir_intrinsic_load_front_face, >> + 0, 32), >> nir_imm_float(&c->build, 1.0), >> nir_imm_float(&c->build, -1.0)), >> nir_imm_float(&c->build, 0.0), >> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c >> index 0757db0d04b..ca70734981a 100644 >> --- a/src/intel/blorp/blorp_blit.c >> +++ b/src/intel/blorp/blorp_blit.c >> @@ -114,7 +114,7 @@ blorp_blit_get_frag_coords(nir_builder *b, >> >> if (key->persample_msaa_dispatch) { >> return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, >> 1), >> - nir_load_sample_id(b)); >> + nir_load_sample_id(b, 32)); >> } else { >> return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord, >> 1)); >> } >> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c >> index 832e8ee26f9..c0207d8fa0c 100644 >> --- a/src/intel/blorp/blorp_clear.c >> +++ b/src/intel/blorp/blorp_clear.c >> @@ -880,7 +880,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct >> blorp_context *blorp, >> /* Do an MCS fetch and check if it is equal to the magic clear value >> */ >> nir_ssa_def *mcs = >> blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)), >> - nir_load_layer_id(&b)); >> + nir_load_layer_id(&b, 32)); >> nir_ssa_def *is_clear = >> blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples); >> >> diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> index bfbdea0e8fa..846e82ffdf9 100644 >> --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c >> @@ -61,11 +61,11 @@ lower_cs_intrinsics_convert_block(struct >> lower_intrinsics_state *state, >> if (state->local_workgroup_size <= state->dispatch_width) >> subgroup_id = nir_imm_int(b, 0); >> else >> - subgroup_id = nir_load_subgroup_id(b); >> + subgroup_id = nir_load_subgroup_id(b, 32); >> >> nir_ssa_def *thread_local_id = >> nir_imul(b, subgroup_id, nir_imm_int(b, >> state->dispatch_width)); >> - nir_ssa_def *channel = nir_load_subgroup_invocation(b); >> + nir_ssa_def *channel = nir_load_subgroup_invocation(b, 32); >> sysval = nir_iadd(b, channel, thread_local_id); >> break; >> } >> @@ -86,7 +86,7 @@ lower_cs_intrinsics_convert_block(struct >> lower_intrinsics_state *state, >> */ >> unsigned *size = nir->info.cs.local_size; >> >> - nir_ssa_def *local_index = nir_load_local_invocation_index(b); >> + nir_ssa_def *local_index = nir_load_local_invocation_index(b, >> 32); >> >> nir_const_value uvec3; >> memset(&uvec3, 0, sizeof(uvec3)); >> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c >> b/src/mesa/drivers/dri/i965/brw_tcs.c >> index 931ef64166c..dda6431108d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_tcs.c >> +++ b/src/mesa/drivers/dri/i965/brw_tcs.c >> @@ -48,7 +48,7 @@ create_passthrough_tcs(void *mem_ctx, const struct >> brw_compiler *compiler, >> nir_intrinsic_instr *store; >> nir_ssa_def *zero = nir_imm_int(&b, 0); >> nir_ssa_def *invoc_id = >> - nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); >> + nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0, 32); >> >> nir->info.inputs_read = key->outputs_written & >> ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); >> -- >> 2.14.3 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev