On Tue, May 6, 2014 at 10:48 AM, Roland Scheidegger <srol...@vmware.com> wrote: > Looks good to me.
Thanks! > Does that mean if also the GATHER_SM5 cap is supported you have to > support 4 independent, non-constant offsets? Not 100% sure what you're asking... but yes, for ARB_gs5 to work, you have to support independent non-constant offsets. And if you have PIPE_CAP_TEXTURE_GATHER_OFFSETS enabled, you're making the claim that you can handle multiple independent offsets in a single texgather. Without the cap, the 4 offsets get lowered into 4 separate texgathers (with only one of the returned components used). With nvc0, the offsets are passed in via a register, so non-constant is never an issue. And with nv50, the offsets must be immediates (and there can be only 1 set of them), but it also has no hope of supporting all of ARB_gs5. > Would it make sense to reorder the caps so the gather stuff is all > together (now 5 cap bits just for this...)? The quantity of caps for texgather is a little ridiculous. I'm of the opinion that this should be the default behaviour, and it should be up to the driver to lower it into 4 texgathers if it can't handle them directly. Furthermore, this functionality is only available (via GL) with ARB_gs5, which in turn will require a whole bunch of stuff, so I don't know whether the GATHER_SM5 cap is really that useful. And for someone with a DX tracker, this functionality would again not be useful on its own, the rest of SM5 would have to be supported as well (I assume). But that's not what got implemented, and I don't care to modify radeon, which can only support 1 offset at a time. (Although I don't think the radeon impl got pushed...) I anticipate that llvmpipe doesn't care one way or another (perhaps with even a minor preference towards having it all in one instruction). If there's concensus, happy to switch this on by default and get rid of the cap :) [And also get rid of the GATHER_SM5 cap.] > > Roland > > Am 29.04.2014 01:30, schrieb Ilia Mirkin: >> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> >> --- >> >> The handling of the 4 offsets is less-than-pretty. I had an alternate version >> that created a new ir_dereference_array object and ran ->accept on that. This >> worked as well, but for each offset it would create a separate new array, and >> then deref just one item out of it. This seems incredibly wasteful. The >> slightly open-coded version of that seems reasonable and uses the same array. >> >> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 55 >> ++++++++++++++++++++++-------- >> 1 file changed, 41 insertions(+), 14 deletions(-) >> >> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >> index d1c3856..20d5e99 100644 >> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp >> @@ -87,8 +87,7 @@ extern "C" { >> */ >> #define MAX_ARRAYS 256 >> >> -/* if we support a native gallium TG4 with the ability to take 4 texoffsets >> then bump this */ >> -#define MAX_GLSL_TEXTURE_OFFSET 1 >> +#define MAX_GLSL_TEXTURE_OFFSET 4 >> >> class st_src_reg; >> class st_dst_reg; >> @@ -2728,12 +2727,13 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) >> void >> glsl_to_tgsi_visitor::visit(ir_texture *ir) >> { >> - st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, >> offset, sample_index, component; >> + st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, >> offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; >> st_dst_reg result_dst, coord_dst, cube_sc_dst; >> glsl_to_tgsi_instruction *inst = NULL; >> unsigned opcode = TGSI_OPCODE_NOP; >> const glsl_type *sampler_type = ir->sampler->type; >> bool is_cube_array = false; >> + unsigned i; >> >> /* if we are a cube array sampler */ >> if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && >> @@ -2771,7 +2771,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 >> : TGSI_OPCODE_TEX; >> if (ir->offset) { >> ir->offset->accept(this); >> - offset = this->result; >> + offset[0] = this->result; >> } >> break; >> case ir_txb: >> @@ -2780,7 +2780,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> lod_info = this->result; >> if (ir->offset) { >> ir->offset->accept(this); >> - offset = this->result; >> + offset[0] = this->result; >> } >> break; >> case ir_txl: >> @@ -2789,7 +2789,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> lod_info = this->result; >> if (ir->offset) { >> ir->offset->accept(this); >> - offset = this->result; >> + offset[0] = this->result; >> } >> break; >> case ir_txd: >> @@ -2800,7 +2800,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> dy = this->result; >> if (ir->offset) { >> ir->offset->accept(this); >> - offset = this->result; >> + offset[0] = this->result; >> } >> break; >> case ir_txs: >> @@ -2814,7 +2814,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> lod_info = this->result; >> if (ir->offset) { >> ir->offset->accept(this); >> - offset = this->result; >> + offset[0] = this->result; >> } >> break; >> case ir_txf_ms: >> @@ -2828,9 +2828,17 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> component = this->result; >> if (ir->offset) { >> ir->offset->accept(this); >> - /* this should have been lowered */ >> - assert(ir->offset->type->base_type != GLSL_TYPE_ARRAY); >> - offset = this->result; >> + if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) { >> + const glsl_type *elt_type = ir->offset->type->fields.array; >> + for (i = 0; i < ir->offset->type->length; i++) { >> + offset[i] = this->result; >> + offset[i].index += i * type_size(elt_type); >> + offset[i].type = elt_type->base_type; >> + offset[i].swizzle = >> swizzle_for_size(elt_type->vector_elements); >> + } >> + } else { >> + offset[0] = this->result; >> + } >> } >> break; >> case ir_lod: >> @@ -2960,8 +2968,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) >> this->prog); >> >> if (ir->offset) { >> - inst->tex_offset_num_offset = 1; >> - inst->tex_offsets[0] = offset; >> + for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != >> PROGRAM_UNDEFINED; i++) >> + inst->tex_offsets[i] = offset[i]; >> + inst->tex_offset_num_offset = i; >> } >> >> switch (sampler_type->sampler_dimensionality) { >> @@ -4479,6 +4488,8 @@ translate_tex_offset(struct st_translate *t, >> { >> struct tgsi_texture_offset offset; >> struct ureg_src imm_src; >> + struct ureg_dst dst; >> + int array; >> >> switch (in_offset->file) { >> case PROGRAM_IMMEDIATE: >> @@ -4500,6 +4511,20 @@ translate_tex_offset(struct st_translate *t, >> offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); >> offset.Padding = 0; >> break; >> + case PROGRAM_ARRAY: >> + array = in_offset->index >> 16; >> + >> + assert(array >= 0); >> + assert(array < (int) Elements(t->arrays)); >> + >> + dst = t->arrays[array]; >> + offset.File = dst.File; >> + offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000; >> + offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0); >> + offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1); >> + offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2); >> + offset.Padding = 0; >> + break; >> default: >> break; >> } >> @@ -5350,6 +5375,7 @@ st_new_shader_program(struct gl_context *ctx, GLuint >> name) >> GLboolean >> st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) >> { >> + struct pipe_screen *pscreen = ctx->st->pipe->screen; >> assert(prog->LinkStatus); >> >> for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { >> @@ -5388,7 +5414,8 @@ st_link_shader(struct gl_context *ctx, struct >> gl_shader_program *prog) >> lower_packing_builtins(ir, lower_inst); >> } >> >> - lower_offset_arrays(ir); >> + if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) >> + lower_offset_arrays(ir); >> do_mat_op_to_vec(ir); >> lower_instructions(ir, >> MOD_TO_FRACT | >> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev