If a non-const sample number is given to interpolateAtSample it will now generate an indirect send message with the sample ID similar to how non-const sampler array indexing works. Previously non-const values were ignored and instead it ended up using a constant 0 value.
The generator will try to determine if the sample ID is dynamically uniform via nir_src_is_dynamically_uniform. If not it will query the pixel interpolator in a loop, once for each different live sample number. The next live sample number is found using emit_uniformize. If multiple live channels have the same sample number then they will be handled in a single iteration of the loop. The loop is necessary because the indirect send message doesn't seem to have a way to specify a different value for each fragment. This fixes the following two Piglit tests: arb_gpu_shader5-interpolateAtSample-nonconst arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform v2: Handle dynamically non-uniform sample ids. v3: Remove the BREAK instruction and predicate the WHILE directly. Make the tokens arrays const. v4: Iterate over the live channels instead of each possible sample number. --- This version of the patch iterates over each live channel instead of each possible sample number. It doesn't need to access STATE_NUM_SAMPLES so it avoids the problem that Francisco mentioned. Note that if it turns out the fragments are mostly all using the same sample number then all of them will be handled at once and the loop will bail out early, so it should be more efficient. It also makes the patch much simpler. src/mesa/drivers/dri/i965/brw_eu.h | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 34 +++++++--- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 93 +++++++++++++++++++++----- 4 files changed, 102 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 761aa0e..0ac1ad9 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index dc699bb..9c38e99 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3212,26 +3212,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); + struct brw_inst *insn; + uint16_t exec_size; - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, - msg_length, response_length, - false /* header is never present for PI */, - false); + if (data.file == BRW_IMMEDIATE_VALUE) { + insn = next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, mrf); + brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, + msg_length, response_length, + false /* header is never present for PI */, + false); + brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud); + } else { + insn = brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data)); + brw_inst_set_mlen(devinfo, insn, msg_length); + brw_inst_set_rlen(devinfo, insn, response_length); + } - brw_inst_set_pi_simd_mode( - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); + exec_size = brw_inst_exec_size(devinfo, p->current); + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ brw_inst_set_pi_nopersp(devinfo, insn, noperspective); brw_inst_set_pi_message_type(devinfo, insn, mode); - brw_inst_set_pi_message_data(devinfo, insn, data); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 6f8b75e..17e19cf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1377,15 +1377,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg msg_data, unsigned msg_type) { - assert(msg_data.file == BRW_IMMEDIATE_VALUE && - msg_data.type == BRW_REGISTER_TYPE_UD); + assert(msg_data.type == BRW_REGISTER_TYPE_UD); brw_pixel_interpolator_query(p, retype(dst, BRW_REGISTER_TYPE_UW), src, inst->pi_noperspective, msg_type, - msg_data.dw1.ud, + msg_data, inst->mlen, inst->regs_written); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 03fe680..8a73bda 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1180,6 +1180,23 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type) } } +/* For most messages, we need one reg of ignored data; the hardware requires + * mlen==1 even when there is no payload. in the per-slot offset case, we'll + * replace this with the proper source data. + */ +static void +setup_pixel_interpolater_instruction(fs_visitor *v, + nir_intrinsic_instr *instr, + fs_inst *inst, + int mlen = 1) +{ + inst->mlen = mlen; + /* 2 floats per slot returned */ + inst->regs_written = 2 * v->dispatch_width / 8; + inst->pi_noperspective = instr->variables[0]->var->data.interpolation == + INTERP_QUALIFIER_NOPERSPECTIVE; +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -1584,27 +1601,71 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - /* For most messages, we need one reg of ignored data; the hardware - * requires mlen==1 even when there is no payload. in the per-slot - * offset case, we'll replace this with the proper source data. - */ fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ fs_inst *inst; switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); + setup_pixel_interpolater_instruction(this, instr, inst); break; case nir_intrinsic_interp_var_at_sample: { - /* XXX: We should probably handle non-constant sample id's */ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - assert(const_sample); - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + + if (const_sample) { + unsigned msg_data = const_sample->i[0] << 4; + + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + fs_reg(msg_data)); + + setup_pixel_interpolater_instruction(this, instr, inst); + } else { + fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + fs_reg sample_id_reg; + + if (nir_src_is_dynamically_uniform(instr->src[0])) { + sample_id_reg = vgrf(glsl_type::uint_type); + bld.SHL(sample_id_reg, sample_src, fs_reg(4u)); + sample_id_reg = bld.emit_uniformize(sample_id_reg); + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + sample_id_reg); + setup_pixel_interpolater_instruction(this, instr, inst); + } else { + /* Make a loop that sends a message to the pixel interpolator + * for the sample number in each live channel. If there are + * multiple channels with the same sample number then these + * will be handled simultaneously with a single interation of + * the loop. + */ + bld.emit(BRW_OPCODE_DO); + + /* Get the next live sample number into sample_id_reg */ + sample_id_reg = bld.emit_uniformize(sample_src); + + /* Set the flag register so that we can perform the send + * message on all channels that have the same sample number + */ + bld.CMP(bld.null_reg_ud(), + sample_src, sample_id_reg, + BRW_CONDITIONAL_EQ); + fs_reg msg_data = component(vgrf(glsl_type::uint_type), 0); + bld.SHL(msg_data, sample_id_reg, fs_reg(4u)) + ->force_writemask_all = true; + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, + msg_data); + setup_pixel_interpolater_instruction(this, instr, inst); + set_predicate(BRW_PREDICATE_NORMAL, inst); + + /* Continue the loop if there are any live channels left */ + set_predicate_inv(BRW_PREDICATE_NORMAL, + true, /* inverse */ + bld.emit(BRW_OPCODE_WHILE)); + } + } + break; } @@ -1617,6 +1678,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, fs_reg(off_x | (off_y << 4))); + setup_pixel_interpolater_instruction(this, instr, inst); } else { src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), @@ -1646,9 +1708,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); } - mlen = 2 * dispatch_width / 8; inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, fs_reg(0u)); + setup_pixel_interpolater_instruction(this, + instr, + inst, + 2 * dispatch_width / 8); } break; } @@ -1657,12 +1722,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unreachable("Invalid intrinsic"); } - inst->mlen = mlen; - /* 2 floats per slot returned */ - inst->regs_written = 2 * dispatch_width / 8; - inst->pi_noperspective = instr->variables[0]->var->data.interpolation == - INTERP_QUALIFIER_NOPERSPECTIVE; - for (unsigned j = 0; j < instr->num_components; j++) { fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev