Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> On 03/29/2017 04:14 PM, Dave Airlie wrote: > From: Dave Airlie <airl...@redhat.com> > > Doing this before tessellation makes doing some bits of > tessellation a bit cleaner. It also cleans up a bit of the > llvm generator code. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/common/ac_nir_to_llvm.c | 144 > ++++++++++------------------------------ > src/amd/vulkan/radv_pipeline.c | 1 + > 2 files changed, 36 insertions(+), 109 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index f164d8f..78602fd 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -144,8 +144,6 @@ struct nir_to_llvm_context { > int num_locals; > LLVMValueRef *locals; > bool has_ddxy; > - uint8_t num_input_clips; > - uint8_t num_input_culls; > uint8_t num_output_clips; > uint8_t num_output_culls; > > @@ -170,12 +168,9 @@ static unsigned > shader_io_get_unique_index(gl_varying_slot slot) > return 0; > if (slot == VARYING_SLOT_PSIZ) > return 1; > - if (slot == VARYING_SLOT_CLIP_DIST0 || > - slot == VARYING_SLOT_CULL_DIST0) > + if (slot == VARYING_SLOT_CLIP_DIST0) > return 2; > - if (slot == VARYING_SLOT_CLIP_DIST1 || > - slot == VARYING_SLOT_CULL_DIST1) > - return 3; > + /* 3 is reserved for clip dist as well */ > if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) > return 4 + (slot - VARYING_SLOT_VAR0); > unreachable("illegal slot in get unique index\n"); > @@ -2195,7 +2190,6 @@ load_gs_input(struct nir_to_llvm_context *ctx, > unsigned param, vtx_offset_param; > LLVMValueRef value[4], result; > unsigned vertex_index; > - unsigned cull_offset = 0; > radv_get_deref_offset(ctx, &instr->variables[0]->deref, > false, &vertex_index, > &const_index, &indir_index); > @@ -2205,13 +2199,11 @@ load_gs_input(struct nir_to_llvm_context *ctx, > LLVMConstInt(ctx->i32, 4, false), ""); > > param = > shader_io_get_unique_index(instr->variables[0]->var->data.location); > - if (instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) > - cull_offset += ctx->num_input_clips; > for (unsigned i = 0; i < instr->num_components; i++) { > > args[0] = ctx->esgs_ring; > args[1] = vtx_offset; > - args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index + > cull_offset) * 256, false); > + args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) > * 256, false); > args[3] = ctx->i32zero; > args[4] = ctx->i32one; /* OFFEN */ > args[5] = ctx->i32zero; /* IDXEN */ > @@ -2366,8 +2358,7 @@ visit_store_var(struct nir_to_llvm_context *ctx, > > value = llvm_extract_elem(ctx, src, chan); > > - if (instr->variables[0]->var->data.location == > VARYING_SLOT_CLIP_DIST0 || > - instr->variables[0]->var->data.location == > VARYING_SLOT_CULL_DIST0) > + if (instr->variables[0]->var->data.compact) > stride = 1; > if (indir_index) { > unsigned count = glsl_count_attribute_slots( > @@ -3143,7 +3134,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, > LLVMValueRef gs_next_vertex; > LLVMValueRef can_emit, kill; > int idx; > - int clip_cull_slot = -1; > + > assert(instr->const_index[0] == 0); > /* Write vertex attribute values to GSVS ring */ > gs_next_vertex = LLVMBuildLoad(ctx->builder, > @@ -3175,27 +3166,11 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, > if (!(ctx->output_mask & (1ull << i))) > continue; > > - if (i == VARYING_SLOT_CLIP_DIST1 || > - i == VARYING_SLOT_CULL_DIST1) > - continue; > - > - if (i == VARYING_SLOT_CLIP_DIST0 || > - i == VARYING_SLOT_CULL_DIST0) { > + if (i == VARYING_SLOT_CLIP_DIST0) { > /* pack clip and cull into a single set of slots */ > - if (clip_cull_slot == -1) { > - clip_cull_slot = idx; > - if (ctx->num_output_clips + > ctx->num_output_culls > 4) > - slot_inc = 2; > - } else { > - slot = clip_cull_slot; > - slot_inc = 0; > - } > - if (i == VARYING_SLOT_CLIP_DIST0) > - length = ctx->num_output_clips; > - if (i == VARYING_SLOT_CULL_DIST0) { > - start = ctx->num_output_clips; > - length = ctx->num_output_culls; > - } > + length = ctx->num_output_clips + ctx->num_output_culls; > + if (length > 4) > + slot_inc = 2; > } > for (unsigned j = 0; j < length; j++) { > LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, > @@ -4083,22 +4058,6 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, > } > } > > -static void > -handle_gs_input_decl(struct nir_to_llvm_context *ctx, > - struct nir_variable *variable) > -{ > - int idx = variable->data.location; > - > - if (idx == VARYING_SLOT_CLIP_DIST0 || > - idx == VARYING_SLOT_CULL_DIST0) { > - int length = > glsl_get_length(glsl_get_array_element(variable->type)); > - if (idx == VARYING_SLOT_CLIP_DIST0) > - ctx->num_input_clips = length; > - else > - ctx->num_input_culls = length; > - } > -} > - > static void interp_fs_input(struct nir_to_llvm_context *ctx, > unsigned attr, > LLVMValueRef interp_param, > @@ -4191,9 +4150,6 @@ handle_shader_input_decl(struct nir_to_llvm_context > *ctx, > case MESA_SHADER_FRAGMENT: > handle_fs_input_decl(ctx, variable); > break; > - case MESA_SHADER_GEOMETRY: > - handle_gs_input_decl(ctx, variable); > - break; > default: > break; > } > @@ -4276,33 +4232,33 @@ static LLVMValueRef si_build_alloca_undef(struct > nir_to_llvm_context *ctx, > > static void > handle_shader_output_decl(struct nir_to_llvm_context *ctx, > + struct nir_shader *nir, > struct nir_variable *variable) > { > int idx = variable->data.location + variable->data.index; > unsigned attrib_count = glsl_count_attribute_slots(variable->type, > false); > - > + unsigned mask_attribs; > variable->data.driver_location = idx * 4; > > if (ctx->stage == MESA_SHADER_VERTEX || > ctx->stage == MESA_SHADER_GEOMETRY) { > - if (idx == VARYING_SLOT_CLIP_DIST0 || > - idx == VARYING_SLOT_CULL_DIST0) { > + if (idx == VARYING_SLOT_CLIP_DIST0) { > int length = glsl_get_length(variable->type); > - if (idx == VARYING_SLOT_CLIP_DIST0) { > - if (ctx->stage == MESA_SHADER_VERTEX) > - > ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1; > - ctx->num_output_clips = length; > - } else if (idx == VARYING_SLOT_CULL_DIST0) { > - if (ctx->stage == MESA_SHADER_VERTEX) > - > ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1; > - ctx->num_output_culls = length; > + if (ctx->stage == MESA_SHADER_VERTEX) { > + ctx->shader_info->vs.outinfo.clip_dist_mask = > (1 << nir->info->clip_distance_array_size) - 1; > + ctx->shader_info->vs.outinfo.cull_dist_mask = > (1 << nir->info->cull_distance_array_size) - 1; > } > + ctx->num_output_clips = > nir->info->clip_distance_array_size; > + ctx->num_output_culls = > nir->info->cull_distance_array_size; > + > if (length > 4) > attrib_count = 2; > else > attrib_count = 1; > } > - } > + mask_attribs = ((1ull << 1) - 1) << idx; > + } else > + mask_attribs = ((1ull << attrib_count) - 1) << idx; > > for (unsigned i = 0; i < attrib_count; ++i) { > for (unsigned chan = 0; chan < 4; chan++) { > @@ -4310,7 +4266,7 @@ handle_shader_output_decl(struct nir_to_llvm_context > *ctx, > si_build_alloca_undef(ctx, ctx->f32, ""); > } > } > - ctx->output_mask |= ((1ull << attrib_count) - 1) << idx; > + ctx->output_mask |= mask_attribs; > } > > static void > @@ -4519,14 +4475,10 @@ handle_vs_outputs_post(struct nir_to_llvm_context > *ctx, > struct ac_export_args args, pos_args[4] = {}; > LLVMValueRef psize_value = NULL, layer_value = NULL, > viewport_index_value = NULL; > int i; > - const uint64_t clip_mask = ctx->output_mask & ((1ull << > VARYING_SLOT_CLIP_DIST0) | > - (1ull << > VARYING_SLOT_CLIP_DIST1) | > - (1ull << > VARYING_SLOT_CULL_DIST0) | > - (1ull << > VARYING_SLOT_CULL_DIST1)); > > outinfo->prim_id_output = 0xffffffff; > outinfo->layer_output = 0xffffffff; > - if (clip_mask) { > + if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { > LLVMValueRef slots[8]; > unsigned j; > > @@ -4534,13 +4486,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, > outinfo->cull_dist_mask <<= ctx->num_output_clips; > > i = VARYING_SLOT_CLIP_DIST0; > - for (j = 0; j < ctx->num_output_clips; j++) > + for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; > j++) > slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, > > ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); > - i = VARYING_SLOT_CULL_DIST0; > - for (j = 0; j < ctx->num_output_culls; j++) > - slots[ctx->num_output_clips + j] = to_float(ctx, > LLVMBuildLoad(ctx->builder, > - > ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); > > for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; > i++) > slots[i] = LLVMGetUndef(ctx->f32); > @@ -4570,10 +4518,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, > > if (i == VARYING_SLOT_POS) { > target = V_008DFC_SQ_EXP_POS; > - } else if (i == VARYING_SLOT_CLIP_DIST0 || > - i == VARYING_SLOT_CLIP_DIST1 || > - i == VARYING_SLOT_CULL_DIST0 || > - i == VARYING_SLOT_CULL_DIST1) { > + } else if (i == VARYING_SLOT_CLIP_DIST0) { > continue; > } else if (i == VARYING_SLOT_PSIZ) { > outinfo->writes_pointsize = true; > @@ -4679,12 +4624,9 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, > if (!(ctx->output_mask & (1ull << i))) > continue; > > - if (i == VARYING_SLOT_CLIP_DIST0) { > - length = ctx->num_output_clips; > - } else if (i == VARYING_SLOT_CULL_DIST0) { > - start = ctx->num_output_clips; > - length = ctx->num_output_culls; > - } > + if (i == VARYING_SLOT_CLIP_DIST0) > + length = ctx->num_output_clips + ctx->num_output_culls; > + > param_index = shader_io_get_unique_index(i); > > if (param_index > max_output_written) > @@ -4980,7 +4922,7 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > handle_fs_inputs_pre(&ctx, nir); > > nir_foreach_variable(variable, &nir->outputs) > - handle_shader_output_decl(&ctx, variable); > + handle_shader_output_decl(&ctx, nir, variable); > > ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, > _mesa_key_pointer_equal); > @@ -5185,7 +5127,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) > args[8] = ctx->i32zero; /* TFE */ > > int idx = 0; > - int clip_cull_slot = -1; > + > for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { > int length = 4; > int start = 0; > @@ -5194,27 +5136,11 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context > *ctx) > if (!(ctx->output_mask & (1ull << i))) > continue; > > - if (i == VARYING_SLOT_CLIP_DIST1 || > - i == VARYING_SLOT_CULL_DIST1) > - continue; > - > - if (i == VARYING_SLOT_CLIP_DIST0 || > - i == VARYING_SLOT_CULL_DIST0) { > + if (i == VARYING_SLOT_CLIP_DIST0) { > /* unpack clip and cull from a single set of slots */ > - if (clip_cull_slot == -1) { > - clip_cull_slot = idx; > - if (ctx->num_output_clips + > ctx->num_output_culls > 4) > - slot_inc = 2; > - } else { > - slot = clip_cull_slot; > - slot_inc = 0; > - } > - if (i == VARYING_SLOT_CLIP_DIST0) > - length = ctx->num_output_clips; > - if (i == VARYING_SLOT_CULL_DIST0) { > - start = ctx->num_output_clips; > - length = ctx->num_output_culls; > - } > + length = ctx->num_output_clips + ctx->num_output_culls; > + if (length > 4) > + slot_inc = 2; > } > > for (unsigned j = 0; j < length; j++) { > @@ -5268,7 +5194,7 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, > ac_setup_rings(&ctx); > > nir_foreach_variable(variable, &geom_shader->outputs) > - handle_shader_output_decl(&ctx, variable); > + handle_shader_output_decl(&ctx, geom_shader, variable); > > ac_gs_copy_shader_emit(&ctx); > > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 07020e8..df46276 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -246,6 +246,7 @@ radv_shader_compile_to_nir(struct radv_device *device, > */ > NIR_PASS_V(nir, nir_lower_constant_initializers, ~0); > NIR_PASS_V(nir, nir_lower_system_values); > + NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); > } > > /* Vulkan uses the separate-shader linking model */ >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev