Neil Roberts <[email protected]> writes: > If a non-const sample number is given to interpolateAtSample it will > now generate an indirect send message with the sample ID similar to > how non-const sampler array indexing works. Previously non-const > values were ignored and instead it ended up using a constant 0 value. > > The generator will try to determine if the sample ID is dynamically > uniform via nir_src_is_dynamically_uniform. If not it will query the > pixel interpolator in a loop, once for each possible sample number. > This is necessary because the indirect send message doesn't seem to > have a way to specify a different value for each fragment. > > The range of possible sample numbers is determined using > STATE_NUM_SAMPLES. When linking the shader it will now add a reference > to this state if any dynamically non-uniform calls to > interpolateAtSample are found. > > This fixes the following two Piglit tests: > > arb_gpu_shader5-interpolateAtSample-nonconst > arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform > > v2: Handle dynamically non-uniform sample ids. > v3: Remove the BREAK instruction and predicate the WHILE directly. > Make the tokens arrays const. > --- > > All three patches are also available in a branch here: > > https://github.com/bpeel/mesa/tree/wip/nonconst-interpolate-at-sample > > src/mesa/drivers/dri/i965/brw_eu.h | 2 +- > src/mesa/drivers/dri/i965/brw_eu_emit.c | 34 ++++--- > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +- > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 127 > +++++++++++++++++++++---- > src/mesa/drivers/dri/i965/brw_program.c | 54 +++++++++++ > src/mesa/drivers/dri/i965/brw_program.h | 1 + > src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + > 7 files changed, 193 insertions(+), 32 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 761aa0e..0ac1ad9 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg mrf, > bool noperspective, > unsigned mode, > - unsigned data, > + struct brw_reg data, > unsigned msg_length, > unsigned response_length); > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c > b/src/mesa/drivers/dri/i965/brw_eu_emit.c > index dc699bb..9c38e99 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c > @@ -3212,26 +3212,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p, > struct brw_reg mrf, > bool noperspective, > unsigned mode, > - unsigned data, > + struct brw_reg data, > unsigned msg_length, > unsigned response_length) > { > const struct brw_device_info *devinfo = p->devinfo; > - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); > + struct brw_inst *insn; > + uint16_t exec_size; > > - brw_set_dest(p, insn, dest); > - brw_set_src0(p, insn, mrf); > - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, > - msg_length, response_length, > - false /* header is never present for PI */, > - false); > + if (data.file == BRW_IMMEDIATE_VALUE) { > + insn = next_insn(p, BRW_OPCODE_SEND); > + brw_set_dest(p, insn, dest); > + brw_set_src0(p, insn, mrf); > + brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, > + msg_length, response_length, > + false /* header is never present for PI */, > + false); > + brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud); > + } else { > + insn = brw_send_indirect_message(p, > + GEN7_SFID_PIXEL_INTERPOLATOR, > + dest, > + mrf, > + vec1(data)); > + brw_inst_set_mlen(devinfo, insn, msg_length); > + brw_inst_set_rlen(devinfo, insn, response_length); > + } > > - brw_inst_set_pi_simd_mode( > - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); > + exec_size = brw_inst_exec_size(devinfo, p->current); > + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); > brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px > dispatch */ > brw_inst_set_pi_nopersp(devinfo, insn, noperspective); > brw_inst_set_pi_message_type(devinfo, insn, mode); > - brw_inst_set_pi_message_data(devinfo, insn, data); > } > > void > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 6f8b75e..17e19cf 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -1377,15 +1377,14 @@ > fs_generator::generate_pixel_interpolator_query(fs_inst *inst, > struct brw_reg msg_data, > unsigned msg_type) > { > - assert(msg_data.file == BRW_IMMEDIATE_VALUE && > - msg_data.type == BRW_REGISTER_TYPE_UD); > + assert(msg_data.type == BRW_REGISTER_TYPE_UD); > > brw_pixel_interpolator_query(p, > retype(dst, BRW_REGISTER_TYPE_UW), > src, > inst->pi_noperspective, > msg_type, > - msg_data.dw1.ud, > + msg_data, > inst->mlen, > inst->regs_written); > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 03fe680..0625e44 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -1180,6 +1180,47 @@ get_image_atomic_op(nir_intrinsic_op op, const > glsl_type *type) > } > } > > +/* For most messages, we need one reg of ignored data; the hardware requires > + * mlen==1 even when there is no payload. in the per-slot offset case, we'll > + * replace this with the proper source data. > + */ > +static void > +setup_pixel_interpolater_instruction(fs_visitor *v, > + nir_intrinsic_instr *instr, > + fs_inst *inst, > + int mlen = 1) > +{ > + inst->mlen = mlen; > + /* 2 floats per slot returned */ > + inst->regs_written = 2 * v->dispatch_width / 8; > + inst->pi_noperspective = instr->variables[0]->var->data.interpolation == > + INTERP_QUALIFIER_NOPERSPECTIVE; > +} > + > +static fs_reg > +get_num_samples_reg(fs_visitor *v) > +{ > + struct gl_program_parameter_list *params = v->prog->Parameters; > + static const gl_state_index tokens[STATE_LENGTH] = { > + STATE_NUM_SAMPLES > + }; > + GLuint index = _mesa_add_state_reference(params, tokens); > + unsigned i; > + > + /* Try to find an existing copy of the uniform */ > + for (i = 0; i < v->uniforms; i++) { > + if (v->stage_prog_data->param[i] == > + &v->prog->Parameters->ParameterValues[index][0]) > + goto found; > + } > + > + v->stage_prog_data->param[v->uniforms++] = > + &v->prog->Parameters->ParameterValues[index][0]; > + > +found: > + return retype(fs_reg(UNIFORM, i), BRW_REGISTER_TYPE_UD); > +} > +
I think this (or rather our uniform handling) is flawed in the same way
as fs_visitor::rescale_texcoord(). The UNIFORM file maps one-to-one to
the brw_stage_prog_data::param array during the SIMD8 compilation,
however before the SIMD16 compilation is run uniforms are split into
pull and push params and re-packed. After that point the UNIFORM file
maps to either ::param or ::pull_param according to either the push_ or
pull_constant_loc[] remapping tables depending on which one of the two
is non-negative... Sigh...
> void
> fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr
> *instr)
> {
> @@ -1584,27 +1625,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
>
> fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
>
> - /* For most messages, we need one reg of ignored data; the hardware
> - * requires mlen==1 even when there is no payload. in the per-slot
> - * offset case, we'll replace this with the proper source data.
> - */
> fs_reg src = vgrf(glsl_type::float_type);
> - int mlen = 1; /* one reg unless overriden */
> fs_inst *inst;
>
> switch (instr->intrinsic) {
> case nir_intrinsic_interp_var_at_centroid:
> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
> dst_xy, src, fs_reg(0u));
> + setup_pixel_interpolater_instruction(this, instr, inst);
> break;
>
> case nir_intrinsic_interp_var_at_sample: {
> - /* XXX: We should probably handle non-constant sample id's */
> nir_const_value *const_sample =
> nir_src_as_const_value(instr->src[0]);
> - assert(const_sample);
> - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
> - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> - fs_reg(msg_data));
> +
> + if (const_sample) {
> + unsigned msg_data = const_sample->i[0] << 4;
> +
> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> + fs_reg(msg_data));
> +
> + setup_pixel_interpolater_instruction(this, instr, inst);
> + } else {
> + fs_reg sample_src = retype(get_nir_src(instr->src[0]),
> + BRW_REGISTER_TYPE_UD);
> + fs_reg sample_id_reg;
> +
> + if (nir_src_is_dynamically_uniform(instr->src[0])) {
> + sample_id_reg = vgrf(glsl_type::uint_type);
> + bld.SHL(sample_id_reg, sample_src, fs_reg(4u));
> + sample_id_reg = bld.emit_uniformize(sample_id_reg);
> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> + sample_id_reg);
> + setup_pixel_interpolater_instruction(this, instr, inst);
> + } else {
> + /* Make a loop that sends a message to the pixel interpolator
> + * for each possible sample number so that each individual
> + * message will be dynamically uniform. The number of samples
> + * is determined by accessing the STATE_NUM_SAMPLES state var.
> + */
> + fs_reg i_reg = vgrf(glsl_type::uint_type);
> + fs_reg sample_id_reg = vgrf(glsl_type::uint_type);
> + fs_reg num_samples_reg = get_num_samples_reg(this);
> +
> + bld.ADD(retype(i_reg, BRW_REGISTER_TYPE_D),
> + retype(num_samples_reg, BRW_REGISTER_TYPE_D),
> + fs_reg(-1));
> +
> + bld.emit(BRW_OPCODE_DO);
> +
> + bld.CMP(bld.null_reg_ud(),
> + sample_src, i_reg,
> + BRW_CONDITIONAL_EQ);
> + bld.IF(BRW_PREDICATE_NORMAL);
> + bld.SHL(sample_id_reg, i_reg, fs_reg(4u));
> + sample_id_reg = bld.emit_uniformize(sample_id_reg);
> + inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
> + sample_id_reg);
> + setup_pixel_interpolater_instruction(this, instr, inst);
> + bld.emit(BRW_OPCODE_ENDIF);
> +
> + /* The conditional flag on instructions other than CMP*
> compare
> + * the *result* of the operation to zero rather than comparing
> + * the two sources.
> + */
> + set_condmod(BRW_CONDITIONAL_GE,
> + bld.ADD(retype(i_reg, BRW_REGISTER_TYPE_D),
> + retype(i_reg, BRW_REGISTER_TYPE_D),
> + fs_reg(-1)));
> +
> + /* The predicate means that the loop will continue if i is now
> + * >= 0
> + */
> + set_predicate(BRW_PREDICATE_NORMAL,
> bld.emit(BRW_OPCODE_WHILE));
> + }
> + }
> +
> break;
> }
>
> @@ -1617,6 +1712,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
>
> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy,
> src,
> fs_reg(off_x | (off_y << 4)));
> + setup_pixel_interpolater_instruction(this, instr, inst);
> } else {
> src = vgrf(glsl_type::ivec2_type);
> fs_reg offset_src = retype(get_nir_src(instr->src[0]),
> @@ -1646,9 +1742,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
> bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
> }
>
> - mlen = 2 * dispatch_width / 8;
> inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
> dst_xy, src,
> fs_reg(0u));
> + setup_pixel_interpolater_instruction(this,
> + instr,
> + inst,
> + 2 * dispatch_width / 8);
> }
> break;
> }
> @@ -1657,12 +1756,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr
> unreachable("Invalid intrinsic");
> }
>
> - inst->mlen = mlen;
> - /* 2 floats per slot returned */
> - inst->regs_written = 2 * dispatch_width / 8;
> - inst->pi_noperspective = instr->variables[0]->var->data.interpolation
> ==
> - INTERP_QUALIFIER_NOPERSPECTIVE;
> -
> for (unsigned j = 0; j < instr->num_components; j++) {
> fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
> src.type = dest.type;
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c
> b/src/mesa/drivers/dri/i965/brw_program.c
> index a034dac..2d7966c 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -144,6 +144,8 @@ brwProgramStringNotify(struct gl_context *ctx,
>
> prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT,
> true);
>
> + brw_add_interpolate_at_sample_params(prog);
> +
> brw_fs_precompile(ctx, NULL, prog);
> break;
> }
> @@ -242,6 +244,58 @@ brw_add_texrect_params(struct gl_program *prog)
> }
> }
>
> +static bool
> +find_interpolate_at_sample_in_block(nir_block *block,
> + void *data)
> +{
> + nir_foreach_instr(block, instr) {
> + if (instr->type != nir_instr_type_intrinsic)
> + continue;
> +
> + nir_intrinsic_instr *intrinsic_instr = nir_instr_as_intrinsic(instr);
> +
> + if (intrinsic_instr->intrinsic != nir_intrinsic_interp_var_at_sample)
> + continue;
> +
> + /* If the sample number is known to be dynamically uniform then
> + * the generator won't need the num_samples state.
> + */
> + if (nir_src_is_dynamically_uniform(intrinsic_instr->src[0]))
> + continue;
> +
> + return false;
> + }
> +
> + return true;
> +}
> +
> +void
> +brw_add_interpolate_at_sample_params(struct gl_program *prog)
> +{
> + static const gl_state_index tokens[STATE_LENGTH] = {
> + STATE_NUM_SAMPLES
> + };
> +
> + if (!prog->nir)
> + return;
> +
> + /* If anything calls interpolateAtSample with a dynamically non-uniform
> + * sample ID then we need STATE_NUM_SAMPLES to be able to iterate over
> + * each possible value.
> + */
> + nir_foreach_overload(prog->nir, overload) {
> + if (overload->impl) {
> + bool found = !nir_foreach_block(overload->impl,
> + find_interpolate_at_sample_in_block,
> + ralloc_parent(overload->impl));
> + if (found) {
> + _mesa_add_state_reference(prog->Parameters, tokens);
> + break;
> + }
> + }
> + }
> +}
> +
> /* Per-thread scratch space is a power-of-two multiple of 1KB. */
> int
> brw_get_scratch_size(int size)
> diff --git a/src/mesa/drivers/dri/i965/brw_program.h
> b/src/mesa/drivers/dri/i965/brw_program.h
> index cf0522a..d68eecc 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.h
> +++ b/src/mesa/drivers/dri/i965/brw_program.h
> @@ -164,6 +164,7 @@ bool brw_debug_recompile_sampler_key(struct brw_context
> *brw,
> const struct brw_sampler_prog_key_data
> *old_key,
> const struct brw_sampler_prog_key_data
> *key);
> void brw_add_texrect_params(struct gl_program *prog);
> +void brw_add_interpolate_at_sample_params(struct gl_program *prog);
>
> void
> brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 7bc080b..1e98881 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -388,6 +388,8 @@ brw_link_shader(struct gl_context *ctx, struct
> gl_shader_program *shProg)
> prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
> is_scalar_shader_stage(compiler, stage));
>
> + brw_add_interpolate_at_sample_params(prog);
> +
> _mesa_reference_program(ctx, &prog, NULL);
> }
>
> --
> 1.9.3
>
> _______________________________________________
> mesa-dev mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
