On 05/05/17 21:24, Jason Ekstrand wrote: > On Fri, May 5, 2017 at 1:04 AM, Alejandro Piñeiro > <apinhe...@igalia.com <mailto:apinhe...@igalia.com>> wrote: > > On 05/05/17 04:11, Jason Ekstrand wrote: > > We're already doing this in the FS back-end. This just does the > same > > thing in the vec4 back-end. > > --- > > src/intel/compiler/brw_nir.c | 3 -- > > src/intel/compiler/brw_vec4.cpp | 44 ------------------- > > src/intel/compiler/brw_vec4.h | 2 - > > src/intel/compiler/brw_vec4_gs_nir.cpp | 10 ++--- > > src/intel/compiler/brw_vec4_gs_visitor.cpp | 70 > +++++++++++++++++++++--------- > > src/intel/compiler/brw_vec4_gs_visitor.h | 4 +- > > src/intel/compiler/gen6_gs_visitor.cpp | 4 +- > > 7 files changed, 56 insertions(+), 81 deletions(-) > > > > diff --git a/src/intel/compiler/brw_nir.c > b/src/intel/compiler/brw_nir.c > > index d92e8fa..8a9f89f 100644 > > --- a/src/intel/compiler/brw_nir.c > > +++ b/src/intel/compiler/brw_nir.c > > @@ -342,9 +342,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, > bool is_scalar, > > /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ > > nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); > > > > - if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar) > > - return; > > - > > /* This pass needs actual constants */ > > nir_opt_constant_folding(nir); > > > > diff --git a/src/intel/compiler/brw_vec4.cpp > b/src/intel/compiler/brw_vec4.cpp > > index b387321..ad92951 100644 > > --- a/src/intel/compiler/brw_vec4.cpp > > +++ b/src/intel/compiler/brw_vec4.cpp > > @@ -1693,50 +1693,6 @@ attribute_to_hw_reg(int attr, > brw_reg_type type, bool interleaved) > > return reg; > > } > > > > - > > -/** > > - * Replace each register of type ATTR in this->instructions > with a reference > > - * to a fixed HW register. > > - * > > - * If interleaved is true, then each attribute takes up half a > register, with > > - * register N containing attribute 2*N in its first half and > attribute 2*N+1 > > - * in its second half (this corresponds to the payload setup > used by geometry > > - * shaders in "single" or "dual instanced" dispatch mode). If > interleaved is > > - * false, then each attribute takes up a whole register, with > register N > > - * containing attribute N (this corresponds to the payload > setup used by > > - * vertex shaders, and by geometry shaders in "dual object" > dispatch mode). > > - */ > > -void > > -vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, > > - bool interleaved) > > -{ > > - foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > > - for (int i = 0; i < 3; i++) { > > - if (inst->src[i].file != ATTR) > > - continue; > > - > > - int grf = attribute_map[inst->src[i].nr + > > - inst->src[i].offset / REG_SIZE]; > > - assert(inst->src[i].offset % REG_SIZE == 0); > > - > > - /* All attributes used in the shader need to have been > assigned a > > - * hardware register by the caller > > - */ > > - assert(grf != 0); > > - > > - struct brw_reg reg = > > - attribute_to_hw_reg(grf, inst->src[i].type, > interleaved); > > - reg.swizzle = inst->src[i].swizzle; > > - if (inst->src[i].abs) > > - reg = brw_abs(reg); > > - if (inst->src[i].negate) > > - reg = negate(reg); > > - > > - inst->src[i] = reg; > > - } > > - } > > -} > > - > > int > > vec4_vs_visitor::setup_attributes(int payload_reg) > > { > > diff --git a/src/intel/compiler/brw_vec4.h > b/src/intel/compiler/brw_vec4.h > > index 89adfaa..0f92f34 100644 > > --- a/src/intel/compiler/brw_vec4.h > > +++ b/src/intel/compiler/brw_vec4.h > > @@ -367,8 +367,6 @@ public: > > > > protected: > > void emit_vertex(); > > - void lower_attributes_to_hw_regs(const int *attribute_map, > > - bool interleaved); > > void setup_payload_interference(struct ra_graph *g, int > first_payload_node, > > int reg_node_count); > > virtual void setup_payload() = 0; > > diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp > b/src/intel/compiler/brw_vec4_gs_nir.cpp > > index ed8c03b..577f587 100644 > > --- a/src/intel/compiler/brw_vec4_gs_nir.cpp > > +++ b/src/intel/compiler/brw_vec4_gs_nir.cpp > > @@ -66,8 +66,10 @@ > vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) > > nir_const_value *vertex = > nir_src_as_const_value(instr->src[0]); > > nir_const_value *offset_reg = > nir_src_as_const_value(instr->src[1]); > > > > + const unsigned input_array_stride = > prog_data->urb_read_length * 2; > > The change of using the input_array_stride instead of > BRW_VARYING_SLOT_COUNT is not evident (at least to me). There is a > comment explaining the stride purpose on > vec4_gs_visitor::setup_varying_inputs, but perhaps a note here > would be > welcomed. You can drop this suggestion if you prefer. > > > We were using BRW_VARYING_SLOT_COUNT before because we were doing a > generic thing here that we would later remap using > input_array_stride. Now we're just using input_array_stride > directly. I think if this weren't a change, you would probably not > notice. :-) How about: > > All of the inputs for the first vertex come in first followed by all > inputs for the second vertex etc. The stride between vertices is > governed by the URB read length.
Sounds good to me. > > --Jason > > > Other than that: > Reviewed-by: Alejandro Piñeiro <apinhe...@igalia.com > <mailto:apinhe...@igalia.com>> > This still applies (in case you missed). > > > + > > if (nir_dest_bit_size(instr->dest) == 64) { > > - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * > vertex->u32[0] + > > + src = src_reg(ATTR, input_array_stride * vertex->u32[0] + > > instr->const_index[0] + offset_reg->u32[0], > > glsl_type::dvec4_type); > > > > @@ -85,15 +87,11 @@ > vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) > > /* Make up a type...we have no way of knowing... */ > > const glsl_type *const type = > glsl_type::ivec(instr->num_components); > > > > - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * > vertex->u32[0] + > > + src = src_reg(ATTR, input_array_stride * vertex->u32[0] + > > instr->const_index[0] + offset_reg->u32[0], > > type); > > src.swizzle = > BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); > > > > - /* gl_PointSize is passed in the .w component of the > VUE header */ > > - if (instr->const_index[0] == VARYING_SLOT_PSIZ) > > - src.swizzle = BRW_SWIZZLE_WWWW; > > - > > dest = get_nir_dest(instr->dest, src.type); > > dest.writemask = > brw_writemask_for_size(instr->num_components); > > emit(MOV(dest, src)); > > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp > b/src/intel/compiler/brw_vec4_gs_visitor.cpp > > index 4a8b5be..516ab0b 100644 > > --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp > > +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp > > @@ -29,6 +29,7 @@ > > > > #include "brw_vec4_gs_visitor.h" > > #include "gen6_gs_visitor.h" > > +#include "brw_cfg.h" > > #include "brw_fs.h" > > #include "brw_nir.h" > > #include "common/gen_debug.h" > > @@ -72,9 +73,36 @@ > vec4_gs_visitor::make_reg_for_system_value(int location) > > return reg; > > } > > > > +static inline struct brw_reg > > +attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved) > > +{ > > + struct brw_reg reg; > > > > + unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type)); > > + if (interleaved) { > > + reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * > 4), 0, width, 1); > > + } else { > > + reg = brw_vecn_grf(width, attr, 0); > > + } > > + > > + reg.type = type; > > + return reg; > > +} > > + > > +/** > > + * Replace each register of type ATTR in this->instructions > with a reference > > + * to a fixed HW register. > > + * > > + * If interleaved is true, then each attribute takes up half a > register, with > > + * register N containing attribute 2*N in its first half and > attribute 2*N+1 > > + * in its second half (this corresponds to the payload setup > used by geometry > > + * shaders in "single" or "dual instanced" dispatch mode). If > interleaved is > > + * false, then each attribute takes up a whole register, with > register N > > + * containing attribute N (this corresponds to the payload > setup used by > > + * vertex shaders, and by geometry shaders in "dual object" > dispatch mode). > > + */ > > int > > -vec4_gs_visitor::setup_varying_inputs(int payload_reg, int > *attribute_map, > > +vec4_gs_visitor::setup_varying_inputs(int payload_reg, > > int attributes_per_reg) > > { > > /* For geometry shaders there are N copies of the input > attributes, where N > > @@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int > payload_reg, int *attribute_map, > > assert(num_input_vertices <= MAX_GS_INPUT_VERTICES); > > unsigned input_array_stride = prog_data->urb_read_length * 2; > > > > - for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) { > > - int varying = c->input_vue_map.slot_to_varying[slot]; > > - for (unsigned vertex = 0; vertex < num_input_vertices; > vertex++) { > > - attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] = > > - attributes_per_reg * payload_reg + > input_array_stride * vertex + > > - slot; > > + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > > + for (int i = 0; i < 3; i++) { > > + if (inst->src[i].file != ATTR) > > + continue; > > + > > + assert(inst->src[i].offset % REG_SIZE == 0); > > + int grf = payload_reg * attributes_per_reg + > > + inst->src[i].nr + inst->src[i].offset / > REG_SIZE; > > + > > + struct brw_reg reg = > > + attribute_to_hw_reg(grf, inst->src[i].type, > attributes_per_reg > 1); > > + reg.swizzle = inst->src[i].swizzle; > > + if (inst->src[i].abs) > > + reg = brw_abs(reg); > > + if (inst->src[i].negate) > > + reg = negate(reg); > > + > > + inst->src[i] = reg; > > } > > } > > > > @@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int > payload_reg, int *attribute_map, > > return payload_reg + regs_used; > > } > > > > - > > void > > vec4_gs_visitor::setup_payload() > > { > > - int attribute_map[BRW_VARYING_SLOT_COUNT * > MAX_GS_INPUT_VERTICES]; > > - > > /* If we are in dual instanced or single mode, then > attributes are going > > * to be interleaved, so one register contains two attribute > slots. > > */ > > int attributes_per_reg = > > prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT > ? 1 : 2; > > > > - /* If a geometry shader tries to read from an input that > wasn't written by > > - * the vertex shader, that produces undefined results, but > it shouldn't > > - * crash anything. So initialize attribute_map to > zeros--that ensures that > > - * these undefined results are read from r0. > > - */ > > - memset(attribute_map, 0, sizeof(attribute_map)); > > - > > int reg = 0; > > > > /* The payload always contains important data in r0, which > contains > > @@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload() > > > > /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */ > > if (gs_prog_data->include_primitive_id) > > - attribute_map[VARYING_SLOT_PRIMITIVE_ID] = > attributes_per_reg * reg++; > > + reg++; > > > > reg = setup_uniforms(reg); > > > > - reg = setup_varying_inputs(reg, attribute_map, > attributes_per_reg); > > - > > - lower_attributes_to_hw_regs(attribute_map, > attributes_per_reg > 1); > > + reg = setup_varying_inputs(reg, attributes_per_reg); > > > > this->first_non_payload_grf = reg; > > } > > diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h > b/src/intel/compiler/brw_vec4_gs_visitor.h > > index 09221f9..6b21a33 100644 > > --- a/src/intel/compiler/brw_vec4_gs_visitor.h > > +++ b/src/intel/compiler/brw_vec4_gs_visitor.h > > @@ -33,6 +33,7 @@ > > #include "brw_vec4.h" > > > > #define MAX_GS_INPUT_VERTICES 6 > > +#define MAX_GS_INPUT_VARYINGS 32 > > > > #ifdef __cplusplus > > namespace brw { > > @@ -64,8 +65,7 @@ protected: > > virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); > > > > protected: > > - int setup_varying_inputs(int payload_reg, int *attribute_map, > > - int attributes_per_reg); > > + int setup_varying_inputs(int payload_reg, int > attributes_per_reg); > > void emit_control_data_bits(); > > void set_stream_control_data_bits(unsigned stream_id); > > > > diff --git a/src/intel/compiler/gen6_gs_visitor.cpp > b/src/intel/compiler/gen6_gs_visitor.cpp > > index 075bc4a..afc6056 100644 > > --- a/src/intel/compiler/gen6_gs_visitor.cpp > > +++ b/src/intel/compiler/gen6_gs_visitor.cpp > > @@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload() > > > > reg = setup_uniforms(reg); > > > > - reg = setup_varying_inputs(reg, attribute_map, > attributes_per_reg); > > - > > - lower_attributes_to_hw_regs(attribute_map, true); > > + reg = setup_varying_inputs(reg, attributes_per_reg); > > > > this->first_non_payload_grf = reg; > > } > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev