On Fri, Nov 14, 2014 at 4:08 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > On Thursday, November 13, 2014 04:28:20 PM Kristian Høgsberg wrote: >> This patch uses the previous refactoring to add a new run_vs() method >> that generates vertex shader code using the scalar visitor and >> optimizer. >> >> Signed-off-by: Kristian Høgsberg <k...@bitplanet.net> >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 99 ++++++++- >> src/mesa/drivers/dri/i965/brw_fs.h | 21 +- >> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 303 >> ++++++++++++++++++++++++++- >> 3 files changed, 412 insertions(+), 11 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 4dce0a2..8007977 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -1828,6 +1828,56 @@ fs_visitor::assign_urb_setup() >> urb_start + prog_data->num_varying_inputs * 2; >> } >> >> +void >> +fs_visitor::assign_vs_urb_setup() >> +{ >> + brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; >> + int grf, count, slot, channel, attr; >> + >> + assert(stage == MESA_SHADER_VERTEX); >> + count = _mesa_bitcount_64(vs_prog_data->inputs_read); >> + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) >> + count++; >> + >> + /* Each attribute is 4 regs. */ >> + this->first_non_payload_grf = >> + payload.num_regs + prog_data->curb_read_length + count * 4; >> + >> + unsigned vue_entries = >> + MAX2(count, vs_prog_data->base.vue_map.num_slots); >> + >> + vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; >> + vs_prog_data->base.urb_read_length = (count + 1) / 2; >> + >> + assert(vs_prog_data->base.urb_read_length <= 15); >> + >> + /* Rewrite all ATTR file references to the hw grf that they land in. */ >> + foreach_block_and_inst(block, fs_inst, inst, cfg) { >> + for (int i = 0; i < inst->sources; i++) { >> + if (inst->src[i].file == ATTR) { >> + >> + if (inst->src[i].reg == VERT_ATTRIB_MAX) { >> + slot = count - 1; >> + } else { >> + attr = inst->src[i].reg + inst->src[i].reg_offset / 4; >> + slot = _mesa_bitcount_64(vs_prog_data->inputs_read & >> + BITFIELD64_MASK(attr)); > > I'm having trouble understanding this code - can you explain? > > Reading ir_set_program_inouts.cpp:98 I see that incoming vertex attributes > are always vec4 slots, except for matrices and arrays, which use multiple > vec4 slots. > > I expected your ATTR registers to always be size 4, so reg_offset would have > valid values of 0..3. But I must be mistaken, since you're doing > reg_offset / 4, which would always be 0. Are ATTRs 4*N where N == the # of > matrix columns or array length?
There were cases where reg_offset was > 3, which is why I did it this way. It may be that that's the problem and I shouldn't work around it here... let me assert reg_offset < 4 there and find the piglit cases that triggered this. > Even still - I don't see how applying BITFIELD64_MASK to a potentially > non-power-of-two number and then doing a bitcount will give you a single > accurate slot value. The slot computation is functionally the same as attribute_map[attr]. vec4_vs_visitor::setup_attributes, computes the number of enabled attributes lower than attr in attribute_map[attr]. That's the number of enabled bits in inputs_read that are lower than 1 << attr. We can mask out those bits using BITFIELD64_MASK(attr) and count them using bitcount. > Adding a comment would also be nice to future maintainers. Yea, fair point. > This was the main spot where I got confused - otherwise most of the code > looks good to me. > >> + } >> + >> + channel = inst->src[i].reg_offset & 3; >> + >> + grf = payload.num_regs + >> + prog_data->curb_read_length + >> + slot * 4 + channel; >> + >> + inst->src[i].file = HW_REG; >> + inst->src[i].fixed_hw_reg = >> + retype(brw_vec8_grf(grf, 0), inst->src[i].type); >> + } >> + } >> + } >> +} >> + >> /** >> * Split large virtual GRFs into separate components if we can. >> * >> @@ -3405,6 +3455,13 @@ fs_visitor::setup_payload_gen6() >> } >> >> void >> +fs_visitor::setup_vs_payload() >> +{ >> + /* R0: thread header, R1: urb handles */ >> + payload.num_regs = 2; >> +} >> + >> +void >> fs_visitor::assign_binding_table_offsets() >> { >> assert(stage == MESA_SHADER_FRAGMENT); >> @@ -3471,6 +3528,8 @@ fs_visitor::opt_drop_redundant_mov_to_flags() >> void >> fs_visitor::optimize() >> { >> + const char *stage_name = stage == MESA_SHADER_VERTEX ? "vs" : "fs"; >> + >> calculate_cfg(); >> >> split_virtual_grfs(); >> @@ -3487,8 +3546,8 @@ fs_visitor::optimize() >> \ >> if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ >> char filename[64]; \ >> - snprintf(filename, 64, "fs%d-%04d-%02d-%02d-" #pass, \ >> - dispatch_width, shader_prog ? shader_prog->Name : 0, >> iteration, pass_num); \ >> + snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \ >> + stage_name, dispatch_width, shader_prog ? >> shader_prog->Name : 0, iteration, pass_num); \ >> \ >> backend_visitor::dump_instructions(filename); \ >> } \ >> @@ -3498,8 +3557,8 @@ fs_visitor::optimize() >> >> if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { >> char filename[64]; >> - snprintf(filename, 64, "fs%d-%04d-00-start", >> - dispatch_width, shader_prog ? shader_prog->Name : 0); >> + snprintf(filename, 64, "%s%d-%04d-00-start", >> + stage_name, dispatch_width, shader_prog ? shader_prog->Name >> : 0); >> >> backend_visitor::dump_instructions(filename); >> } >> @@ -3608,6 +3667,38 @@ fs_visitor::allocate_registers() >> } >> >> bool >> +fs_visitor::run_vs() >> +{ >> + assert(stage == MESA_SHADER_VERTEX); >> + >> + assign_common_binding_table_offsets(0); >> + setup_vs_payload(); >> + >> + if (INTEL_DEBUG & DEBUG_SHADER_TIME) >> + emit_shader_time_begin(); >> + >> + foreach_in_list(ir_instruction, ir, shader->base.ir) { >> + base_ir = ir; >> + this->result = reg_undef; >> + ir->accept(this); >> + } >> + base_ir = NULL; >> + if (failed) >> + return false; >> + >> + emit_urb_writes(); >> + >> + optimize(); >> + >> + assign_curb_setup(); >> + assign_vs_urb_setup(); >> + >> + allocate_registers(); >> + >> + return !failed; >> +} >> + >> +bool >> fs_visitor::run() >> { >> sanity_param_count = prog->Parameters->NumParameters; >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >> b/src/mesa/drivers/dri/i965/brw_fs.h >> index bb6f767..6888cdd 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.h >> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >> @@ -310,12 +310,23 @@ public: >> struct gl_shader_program *shader_prog, >> struct gl_fragment_program *fp, >> unsigned dispatch_width); >> + >> + fs_visitor(struct brw_context *brw, >> + void *mem_ctx, >> + const struct brw_vs_prog_key *key, >> + struct brw_vs_prog_data *prog_data, >> + struct gl_shader_program *shader_prog, >> + struct gl_vertex_program *cp, >> + unsigned dispatch_width); >> + >> ~fs_visitor(); >> void init(); >> >> fs_reg *variable_storage(ir_variable *var); >> int virtual_grf_alloc(int size); >> void import_uniforms(fs_visitor *v); >> + void setup_uniform_clipplane_values(); >> + void compute_clip_distance(); >> >> void visit(ir_variable *ir); >> void visit(ir_assignment *ir); >> @@ -406,14 +417,17 @@ public: >> uint32_t const_offset); >> >> bool run(); >> + bool run_vs(); >> void optimize(); >> void allocate_registers(); >> void assign_binding_table_offsets(); >> void setup_payload_gen4(); >> void setup_payload_gen6(); >> + void setup_vs_payload(); >> void assign_curb_setup(); >> void calculate_urb_setup(); >> void assign_urb_setup(); >> + void assign_vs_urb_setup(); >> bool assign_regs(bool allow_spilling); >> void assign_regs_trivial(); >> void get_used_mrfs(bool *mrf_used); >> @@ -471,6 +485,7 @@ public: >> fs_reg *emit_samplepos_setup(); >> fs_reg *emit_sampleid_setup(); >> fs_reg *emit_general_interpolation(ir_variable *ir); >> + fs_reg *emit_vs_system_value(enum brw_reg_type type, int location); >> void emit_interpolation_setup_gen4(); >> void emit_interpolation_setup_gen6(); >> void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); >> @@ -557,6 +572,7 @@ public: >> fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, >> fs_reg src0_alpha, unsigned components); >> void emit_fb_writes(); >> + void emit_urb_writes(); >> >> void emit_shader_time_begin(); >> void emit_shader_time_end(); >> @@ -632,8 +648,8 @@ public: >> struct hash_table *variable_ht; >> fs_reg frag_depth; >> fs_reg sample_mask; >> - fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; >> - unsigned output_components[BRW_MAX_DRAW_BUFFERS]; >> + fs_reg outputs[VARYING_SLOT_MAX]; >> + unsigned output_components[VARYING_SLOT_MAX]; >> fs_reg dual_src_output; >> bool do_dual_src; >> int first_non_payload_grf; >> @@ -680,6 +696,7 @@ public: >> fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; >> fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; >> fs_reg shader_start_time; >> + fs_reg userplane[MAX_CLIP_PLANES]; >> >> int grf_used; >> bool spilled_any_registers; >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> index 0cc51f3..df70340 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> @@ -43,11 +43,40 @@ extern "C" { >> #include "brw_eu.h" >> #include "brw_wm.h" >> } >> +#include "brw_vec4.h" >> #include "brw_fs.h" >> #include "main/uniforms.h" >> #include "glsl/glsl_types.h" >> #include "glsl/ir_optimization.h" >> >> +fs_reg * >> +fs_visitor::emit_vs_system_value(enum brw_reg_type type, int location) >> +{ >> + fs_reg *reg = new(this->mem_ctx) >> + fs_reg(ATTR, VERT_ATTRIB_MAX, type); >> + brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; >> + >> + switch (location) { >> + case SYSTEM_VALUE_BASE_VERTEX: >> + reg->reg_offset = 0; >> + vs_prog_data->uses_vertexid = true; >> + break; >> + case SYSTEM_VALUE_VERTEX_ID: >> + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: >> + reg->reg_offset = 2; >> + vs_prog_data->uses_vertexid = true; >> + break; >> + case SYSTEM_VALUE_INSTANCE_ID: >> + reg->reg_offset = 3; >> + vs_prog_data->uses_instanceid = true; >> + break; >> + default: >> + unreachable("not reached"); >> + } >> + >> + return reg; >> +} >> + >> void >> fs_visitor::visit(ir_variable *ir) >> { >> @@ -57,7 +86,11 @@ fs_visitor::visit(ir_variable *ir) >> return; >> >> if (ir->data.mode == ir_var_shader_in) { >> - if (!strcmp(ir->name, "gl_FragCoord")) { >> + if (stage == MESA_SHADER_VERTEX) { >> + reg = new(this->mem_ctx) >> + fs_reg(ATTR, ir->data.location, >> + brw_type_for_base_type(ir->type->get_scalar_type())); >> + } else if (!strcmp(ir->name, "gl_FragCoord")) { >> reg = emit_fragcoord_interpolation(ir); >> } else if (!strcmp(ir->name, "gl_FrontFacing")) { >> reg = emit_frontfacing_interpolation(); >> @@ -70,7 +103,19 @@ fs_visitor::visit(ir_variable *ir) >> } else if (ir->data.mode == ir_var_shader_out) { >> reg = new(this->mem_ctx) fs_reg(this, ir->type); >> >> - if (ir->data.index > 0) { >> + if (stage == MESA_SHADER_VERTEX) { >> + int vector_elements = >> + ir->type->is_array() ? ir->type->fields.array->vector_elements >> + : ir->type->vector_elements; >> + >> + for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) { >> + int output = ir->data.location + i; >> + this->outputs[output] = *reg; >> + this->outputs[output].reg_offset = i * 4; >> + this->output_components[output] = vector_elements; >> + } >> + >> + } else if (ir->data.index > 0) { >> assert(ir->data.location == FRAG_RESULT_DATA0); >> assert(ir->data.index == 1); >> this->dual_src_output = *reg; >> @@ -134,15 +179,26 @@ fs_visitor::visit(ir_variable *ir) >> reg->type = brw_type_for_base_type(ir->type); >> >> } else if (ir->data.mode == ir_var_system_value) { >> - if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) { >> + switch (ir->data.location) { >> + case SYSTEM_VALUE_BASE_VERTEX: >> + case SYSTEM_VALUE_VERTEX_ID: >> + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: >> + case SYSTEM_VALUE_INSTANCE_ID: >> + reg = emit_vs_system_value(brw_type_for_base_type(ir->type), >> + ir->data.location); >> + break; >> + case SYSTEM_VALUE_SAMPLE_POS: >> reg = emit_samplepos_setup(); >> - } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_ID) { >> + break; >> + case SYSTEM_VALUE_SAMPLE_ID: >> reg = emit_sampleid_setup(); >> - } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN) { >> + break; >> + case SYSTEM_VALUE_SAMPLE_MASK_IN: >> assert(brw->gen >= 7); >> reg = new(mem_ctx) >> fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0), >> BRW_REGISTER_TYPE_D)); >> + break; >> } >> } >> >> @@ -1709,6 +1765,8 @@ get_tex(gl_shader_stage stage, const void *key) >> switch (stage) { >> case MESA_SHADER_FRAGMENT: >> return &((brw_wm_prog_key*) key)->tex; >> + case MESA_SHADER_VERTEX: >> + return &((brw_vec4_prog_key*) key)->tex; > > Doesn't compile. &((brw_vue_prog_key *) key)->tex; > >> default: >> unreachable("unhandled shader stage"); >> } >> @@ -3394,6 +3452,222 @@ fs_visitor::emit_fb_writes() >> } >> >> void >> +fs_visitor::setup_uniform_clipplane_values() >> +{ >> + gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); >> + const struct brw_vec4_prog_key *key = >> + (const struct brw_vec4_prog_key *) this->key; >> + >> + for (int i = 0; i < key->nr_userclip_plane_consts; i++) { >> + this->userplane[i] = fs_reg(UNIFORM, uniforms); >> + for (int j = 0; j < 4; ++j) { >> + stage_prog_data->param[uniforms + j] = >> + (gl_constant_value *) &clip_planes[i][j]; >> + } >> + uniforms += 4; >> + } >> +} >> + >> +void fs_visitor::compute_clip_distance() >> +{ >> + struct brw_vue_prog_data *vue_prog_data = >> + (struct brw_vue_prog_data *) prog_data; >> + const struct brw_vec4_prog_key *key = >> + (const struct brw_vec4_prog_key *) this->key; >> + > > brw_vue_prog_key. > > Please also copy and paste Paul's comment: > > /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables): > * > * "If a linked set of shaders forming the vertex stage contains no > * static write to gl_ClipVertex or gl_ClipDistance, but the > * application has requested clipping against user clip planes through > * the API, then the coordinate written to gl_Position is used for > * comparison against the user clip planes." > * > * This function is only called if the shader didn't write to > * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping > * if the user wrote to it; otherwise we use gl_Position. > */ Right, done. >> + gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX; >> + if (!(vec4_prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) >> + clip_vertex = VARYING_SLOT_POS; >> + >> + /* If the clip vertex isn't written, skip this. Typically this means >> + * the GS will set up clipping. */ > > */ goes on its own line (same feedback applies in many places). > >> + if (outputs[clip_vertex].file == BAD_FILE) >> + return; >> + >> + setup_uniform_clipplane_values(); >> + >> + current_annotation = "user clip distances"; >> + >> + this->outputs[VARYING_SLOT_CLIP_DIST0] = fs_reg(this, >> glsl_type::vec4_type); >> + this->outputs[VARYING_SLOT_CLIP_DIST1] = fs_reg(this, >> glsl_type::vec4_type); >> + >> + for (int i = 0; i < key->nr_userclip_plane_consts; i++) { >> + fs_reg u = userplane[i]; >> + fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4]; >> + output.reg_offset = i & 3; >> + >> + emit(MUL(output, outputs[clip_vertex], u)); >> + for (int j = 1; j < 4; j++) { >> + u.reg = userplane[i].reg + j; >> + emit(MAD(output, output, offset(outputs[clip_vertex], j), u)); >> + } >> + } >> +} >> + >> +void >> +fs_visitor::emit_urb_writes() >> +{ >> + int slot, urb_offset, length; >> + struct brw_vue_prog_data *vue_prog_data = >> + (struct brw_vue_prog_data *) prog_data; >> + const struct brw_vec4_prog_key *key = >> + (const struct brw_vec4_prog_key *) this->key; >> + const GLbitfield64 psiz_mask = >> + VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ; >> + bool flush; >> + fs_reg sources[8]; >> + >> + /* Lower legacy ff and ClipVertex clipping to clip distances */ >> + if (key->userclip_active && !prog->UsesClipDistanceOut) >> + compute_clip_distance(); >> + >> + /* If we don't have any valid slots to write, just do a minimal urb write >> + * send to terminate the shader. */ >> + if (vec4_prog_data->vue_map.slots_valid == 0) { >> + >> + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), >> BRW_REGISTER_TYPE_UD); >> + fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), >> + >> BRW_REGISTER_TYPE_UD)))); >> + inst->force_writemask_all = true; >> + >> + inst = emit(VS_OPCODE_URB_WRITE, reg_undef, payload); >> + inst->eot = true; >> + inst->mlen = 1; >> + inst->offset = 1; >> + return; >> + } >> + >> + length = 0; >> + urb_offset = 0; >> + flush = false; >> + for (slot = 0; slot < vec4_prog_data->vue_map.num_slots; slot++) { >> + fs_reg reg, src, zero; >> + >> + int varying = vec4_prog_data->vue_map.slot_to_varying[slot]; >> + switch (varying) { >> + case VARYING_SLOT_PSIZ: >> + >> + /* The point size varying slot is the vue header and is always in >> the >> + * vue map. But often none of the special varyings that live there >> + * are written and in that case we can skip writing to the vue >> + * header, provided the corresponding state properly clamps the >> + * values further down the pipeline. */ >> + if ((vec4_prog_data->vue_map.slots_valid & psiz_mask) == 0) { >> + assert(length == 0); >> + urb_offset++; >> + break; >> + } >> + >> + zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); >> + emit(MOV(zero, fs_reg(0u))); >> + >> + sources[length++] = zero; >> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) >> + sources[length++] = this->outputs[VARYING_SLOT_LAYER]; >> + else >> + sources[length++] = zero; >> + >> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) >> + sources[length++] = this->outputs[VARYING_SLOT_VIEWPORT]; >> + else >> + sources[length++] = zero; >> + >> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) >> + sources[length++] = this->outputs[VARYING_SLOT_PSIZ]; >> + else >> + sources[length++] = zero; >> + break; >> + >> + case BRW_VARYING_SLOT_NDC: >> + case VARYING_SLOT_EDGE: >> + unreachable("unexpected scalar vs output"); >> + break; >> + >> + case BRW_VARYING_SLOT_PAD: >> + break; >> + >> + default: >> + /* gl_Position is always in the vue map, but isn't always written >> by >> + * the shader. Other varyings (clip distances) get added to the >> vue > > Really? I guess that's true, but it's a link error to not write it prior to > GLSL 1.40, so virtually all shaders do write it... I got this from a couple of piglit tests that use an empty vertex shader and generates the position in the geometry shader. > I suppose you're right, though, and we need this code for clip distance > regardless, so my point is moot. Nevermind. > >> + * map but doesn't always get written. In those cases, the > > "don't always get written" > >> + * corresponding this->output slot will be invalid we can skip the > > ", and" ^ > >> + * urb write for the varying. If we've already queued up a vue >> slot >> + * for writing we flush a mlen 5 urb write, otherwise we just >> advance >> + * the urb_offset. >> + */ >> + if (this->outputs[varying].file == BAD_FILE) { >> + if (length > 0) >> + flush = true; >> + else >> + urb_offset++; >> + break; >> + } >> + >> + for (int i = 0; i < 4; i++) { >> + if ((varying == VARYING_SLOT_COL0 || >> + varying == VARYING_SLOT_COL1 || >> + varying == VARYING_SLOT_BFC0 || >> + varying == VARYING_SLOT_BFC1) && >> + key->clamp_vertex_color) { >> + /* We need to clamp these guys, so do a saturating MOV into a >> + * temp register and use that for the payload. >> + */ >> + reg = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_F); >> + reg.type = this->outputs[varying].type; > > Could be written more succinctly as: > > reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type); Heh, right... >> + src = offset(this->outputs[varying], i); >> + fs_inst *inst = emit(MOV(reg, src)); >> + inst->saturate = true; >> + sources[length++] = reg; >> + } else { >> + sources[length++] = offset(this->outputs[varying], i); >> + } >> + } > > I'd push the loop into the then/else blocks, i.e. > > if (key->clamp_vertex_color && (COL0 || COL1 || BFC0 || BFC1)) { > for (int i = 0; i < 4; i++) { > ... > } > } else { > for (int i = 0; i < 4; i++) > sources[length++] = offset(this->outputs[varying], i); > } Yeah, that feels a little less awkward. >> + break; >> + } >> + >> + current_annotation = "URB write"; >> + >> + /* If we've queued up 8 registers of payload (2 VUE slots), if this is >> + * the last slot or if we need to flush (see BAD_FILE varying case >> + * above), emit a URB write send now to flush out the data. >> + */ >> + int last = slot == vec4_prog_data->vue_map.num_slots - 1; >> + if (length == 8 || last) >> + flush = true; >> + if (flush) { >> + if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME)) >> + emit_shader_time_end(); >> + >> + fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + >> 1); >> + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1), >> + BRW_REGISTER_TYPE_F); >> + >> + /* We need WE_all on the MOV for the message header (the URB >> handles) >> + * so do a MOV to a dummy register and set force_writemask_all on >> the >> + * MOV. LOAD_PAYLOAD will preserve that. >> + */ >> + fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1), >> + BRW_REGISTER_TYPE_UD); >> + fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0), >> + >> BRW_REGISTER_TYPE_UD)))); >> + inst->force_writemask_all = true; >> + payload_sources[0] = dummy; >> + >> + memcpy(&payload_sources[1], sources, length * sizeof sources[0]); >> + emit(LOAD_PAYLOAD(payload, payload_sources, length + 1)); >> + >> + inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); >> + inst->eot = last; >> + inst->mlen = length + 1; >> + inst->offset = urb_offset; >> + urb_offset = slot + 1; >> + length = 0; >> + flush = false; >> + } >> + } >> +} >> + >> +void >> fs_visitor::resolve_ud_negate(fs_reg *reg) >> { >> if (reg->type != BRW_REGISTER_TYPE_UD || >> @@ -3437,6 +3711,25 @@ fs_visitor::fs_visitor(struct brw_context *brw, >> init(); >> } >> >> +fs_visitor::fs_visitor(struct brw_context *brw, >> + void *mem_ctx, >> + const struct brw_vs_prog_key *key, >> + struct brw_vs_prog_data *prog_data, >> + struct gl_shader_program *shader_prog, >> + struct gl_vertex_program *cp, >> + unsigned dispatch_width) >> + : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base.base, >> + MESA_SHADER_VERTEX), >> + reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)), >> + reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)), >> + reg_null_ud(retype(brw_null_vec(dispatch_width), >> BRW_REGISTER_TYPE_UD)), >> + key(key), prog_data(&prog_data->base.base), >> + dispatch_width(dispatch_width) >> +{ >> + this->mem_ctx = mem_ctx; >> + init(); >> +} >> + >> void >> fs_visitor::init() >> { >> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev