Neil Roberts <n...@linux.intel.com> writes: > If a non-const sample number is given to interpolateAtSample it will > now generate an indirect send message with the sample ID similar to > how non-const sampler array indexing works. Previously non-const > values were ignored and instead it ended up using a constant 0 value. > > The generator will try to determine if the sample ID is dynamically > uniform via nir_src_is_dynamically_uniform. If not it will query the > pixel interpolator in a loop, once for each different live sample > number. The next live sample number is found using emit_uniformize. If > multiple live channels have the same sample number then they will be > handled in a single iteration of the loop. The loop is necessary > because the indirect send message doesn't seem to have a way to > specify a different value for each fragment. > > This fixes the following two Piglit tests: > > arb_gpu_shader5-interpolateAtSample-nonconst > arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform > > v2: Handle dynamically non-uniform sample ids. > v3: Remove the BREAK instruction and predicate the WHILE directly. > Make the tokens arrays const. (Matt Turner) > v4: Iterate over the live channels instead of each possible sample > number. > v5: Don't special case immediate values in > brw_pixel_interpolator_query. Make a better wrapper for the > function to set up the PI send instruction. Ensure that the SHL > instructions are scalar. (Francisco Jerez). > --- > > Many thanks to Francisco and Matt for the all the helpful feedback. >
Thank you, looks pretty good to me: Reviewed-by: Francisco Jerez <curroje...@riseup.net> > src/mesa/drivers/dri/i965/brw_eu.h | 2 +- > src/mesa/drivers/dri/i965/brw_eu_emit.c | 27 ++--- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 139 > ++++++++++++++++++++----- > 4 files changed, 130 insertions(+), 43 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 761aa0e..0ac1ad9 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg mrf, > bool noperspective, > unsigned mode, > - unsigned data, > + struct brw_reg data, > unsigned msg_length, > unsigned response_length); > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index dc699bb..bf2fee9 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg mrf, > bool noperspective, > unsigned mode, > - unsigned data, > + struct brw_reg data, > unsigned msg_length, > unsigned response_length) > { > const struct brw_device_info *devinfo = p->devinfo; > - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); > - > - brw_set_dest(p, insn, dest); > - brw_set_src0(p, insn, mrf); > - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, > - msg_length, response_length, > - false /* header is never present for PI */, > - false); > + struct brw_inst *insn; > + const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current); > > - brw_inst_set_pi_simd_mode( > - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); > + /* brw_send_indirect_message will automatically use a direct send message > + * if data is actually immediate. > + */ > + insn = brw_send_indirect_message(p, > + GEN7_SFID_PIXEL_INTERPOLATOR, > + dest, > + mrf, > + vec1(data)); > + brw_inst_set_mlen(devinfo, insn, msg_length); > + brw_inst_set_rlen(devinfo, insn, response_length); > + > + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); > brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px > dispatch */ > brw_inst_set_pi_nopersp(devinfo, insn, noperspective); > brw_inst_set_pi_message_type(devinfo, insn, mode); > - brw_inst_set_pi_message_data(devinfo, insn, data); > } > > void > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 6f8b75e..17e19cf 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1377,15 +1377,14 @@ > fs_generator::generate_pixel_interpolator_query(fs_inst *inst, > struct brw_reg msg_data, > unsigned msg_type) > { > - assert(msg_data.file == BRW_IMMEDIATE_VALUE && > - msg_data.type == BRW_REGISTER_TYPE_UD); > + assert(msg_data.type == BRW_REGISTER_TYPE_UD); > > brw_pixel_interpolator_query(p, > retype(dst, BRW_REGISTER_TYPE_UW), > src, > inst->pi_noperspective, > msg_type, > - msg_data.dw1.ud, > + msg_data, > inst->mlen, > inst->regs_written); > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 03fe680..bc0df68 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -1180,6 +1180,36 @@ get_image_atomic_op(nir_intrinsic_op op, const > glsl_type *type) > } > } > > +static fs_inst * > +emit_pixel_interpolater_send(const fs_builder &bld, > + enum opcode opcode, > + const fs_reg &dst, > + const fs_reg &src, > + const fs_reg &desc, > + glsl_interp_qualifier interpolation) > +{ > + fs_inst *inst; > + fs_reg payload; > + int mlen; > + > + if (src.file == BAD_FILE) { > + /* Dummy payload */ > + payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1); > + mlen = 1; > + } else { > + payload = src; > + mlen = 2 * bld.dispatch_width() / 8; > + } > + > + inst = bld.emit(opcode, dst, payload, desc); > + inst->mlen = mlen; > + /* 2 floats per slot returned */ > + inst->regs_written = 2 * bld.dispatch_width() / 8; > + inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE; > + > + return inst; > +} > + > void > fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr > *instr) > { > @@ -1583,28 +1613,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true; > > fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); > - > - /* For most messages, we need one reg of ignored data; the hardware > - * requires mlen==1 even when there is no payload. in the per-slot > - * offset case, we'll replace this with the proper source data. > - */ > - fs_reg src = vgrf(glsl_type::float_type); > - int mlen = 1; /* one reg unless overriden */ > - fs_inst *inst; > + const glsl_interp_qualifier interpolation = > + (glsl_interp_qualifier) > instr->variables[0]->var->data.interpolation; > > switch (instr->intrinsic) { > case nir_intrinsic_interp_var_at_centroid: > - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, > - dst_xy, src, fs_reg(0u)); > + emit_pixel_interpolater_send(bld, > + FS_OPCODE_INTERPOLATE_AT_CENTROID, > + dst_xy, > + fs_reg(), /* src */ > + fs_reg(0u), > + interpolation); > break; > > case nir_intrinsic_interp_var_at_sample: { > - /* XXX: We should probably handle non-constant sample id's */ > nir_const_value *const_sample = > nir_src_as_const_value(instr->src[0]); > - assert(const_sample); > - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; > - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, > - fs_reg(msg_data)); > + > + if (const_sample) { > + unsigned msg_data = const_sample->i[0] << 4; > + > + emit_pixel_interpolater_send(bld, > + FS_OPCODE_INTERPOLATE_AT_SAMPLE, > + dst_xy, > + fs_reg(), /* src */ > + fs_reg(msg_data), > + interpolation); > + } else { > + const fs_reg sample_src = retype(get_nir_src(instr->src[0]), > + BRW_REGISTER_TYPE_UD); > + > + if (nir_src_is_dynamically_uniform(instr->src[0])) { > + const fs_reg sample_id = bld.emit_uniformize(sample_src); > + const fs_reg msg_data = vgrf(glsl_type::uint_type); > + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, > fs_reg(4u)); > + emit_pixel_interpolater_send(bld, > + FS_OPCODE_INTERPOLATE_AT_SAMPLE, > + dst_xy, > + fs_reg(), /* src */ > + msg_data, > + interpolation); > + } else { > + /* Make a loop that sends a message to the pixel interpolater > + * for the sample number in each live channel. If there are > + * multiple channels with the same sample number then these > + * will be handled simultaneously with a single interation of > + * the loop. > + */ > + bld.emit(BRW_OPCODE_DO); > + > + /* Get the next live sample number into sample_id_reg */ > + const fs_reg sample_id = bld.emit_uniformize(sample_src); > + > + /* Set the flag register so that we can perform the send > + * message on all channels that have the same sample number > + */ > + bld.CMP(bld.null_reg_ud(), > + sample_src, sample_id, > + BRW_CONDITIONAL_EQ); > + const fs_reg msg_data = vgrf(glsl_type::uint_type); > + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, > fs_reg(4u)); > + fs_inst *inst = > + emit_pixel_interpolater_send(bld, > + > FS_OPCODE_INTERPOLATE_AT_SAMPLE, > + dst_xy, > + fs_reg(), /* src */ > + msg_data, > + interpolation); > + set_predicate(BRW_PREDICATE_NORMAL, inst); > + > + /* Continue the loop if there are any live channels left */ > + set_predicate_inv(BRW_PREDICATE_NORMAL, > + true, /* inverse */ > + bld.emit(BRW_OPCODE_WHILE)); > + } > + } > + > break; > } > > @@ -1615,10 +1698,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; > unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; > > - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, > src, > - fs_reg(off_x | (off_y << 4))); > + emit_pixel_interpolater_send(bld, > + > FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, > + dst_xy, > + fs_reg(), /* src */ > + fs_reg(off_x | (off_y << 4)), > + interpolation); > } else { > - src = vgrf(glsl_type::ivec2_type); > + fs_reg src = vgrf(glsl_type::ivec2_type); > fs_reg offset_src = retype(get_nir_src(instr->src[0]), > BRW_REGISTER_TYPE_F); > for (int i = 0; i < 2; i++) { > @@ -1646,9 +1733,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); > } > > - mlen = 2 * dispatch_width / 8; > - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, > dst_xy, src, > - fs_reg(0u)); > + const enum opcode opcode = > FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; > + emit_pixel_interpolater_send(bld, > + opcode, > + dst_xy, > + src, > + fs_reg(0u), > + interpolation); > } > break; > } > @@ -1657,12 +1748,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > unreachable("Invalid intrinsic"); > } > > - inst->mlen = mlen; > - /* 2 floats per slot returned */ > - inst->regs_written = 2 * dispatch_width / 8; > - inst->pi_noperspective = instr->variables[0]->var->data.interpolation > == > - INTERP_QUALIFIER_NOPERSPECTIVE; > - > for (unsigned j = 0; j < instr->num_components; j++) { > fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); > src.type = dest.type; > -- > 1.9.3
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev