On Thu, Feb 19, 2015 at 6:09 PM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > v2 : add double to int/unsigned conversion > v3: handle fp64 consts better > v4: use DRSQ > v4.1: add d2b > v4.2: drop DDIV > > v5: split out some prep patches. > v5.1: add some comments. > v5.2: more comments > > v6: simplify down the double instruction > generation loop. > > v7: Merge Ilia's two cleanup patches. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/mesa/state_tracker/st_extensions.c | 6 + > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 578 > ++++++++++++++++++++++------- > 2 files changed, 458 insertions(+), 126 deletions(-) > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 56502fb..003d280 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -464,7 +478,6 @@ public: > static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, > GLSL_TYPE_ERROR); > > static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, > GLSL_TYPE_ERROR); > - > static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 0); > static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 1); > static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 2);
Drop this hunk. > @@ -597,22 +616,129 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, > unsigned op, > > this->instructions.push_tail(inst); > > + /* > + * This section contains the double processing. > + * GLSL just represents doubles as single channel values, > + * however most HW and TGSI represent doubles as pairs of register > channels. > + * > + * so we have to fixup destination writemask/index and src > swizzle/indexes. > + * dest writemasks need to translate from single channel write mask > + * to a dual-channel writemask, but also need to modify the index, > + * if we are touching the Z,W fields in the pre-translated writemask. > + * > + * src channels have similiar index modifications along with swizzle > + * changes to we pick the XY, ZW pairs from the correct index. > + * > + * GLSL [0].x -> TGSI [0].xy > + * GLSL [0].y -> TGSI [0].zw > + * GLSL [0].z -> TGSI [1].xy > + * GLSL [0].w -> TGSI [1].zw > + */ > + if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == > GLSL_TYPE_DOUBLE || > + inst->src[0].type == GLSL_TYPE_DOUBLE) { > + glsl_to_tgsi_instruction *dinst = NULL; > + int initial_src_swz[4], initial_src_idx[4]; > + int initial_dst_idx[2], initial_dst_writemask[2]; > + /* select the writemask for dst0 or dst1 */ > + unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? > inst->dst[1].writemask : inst->dst[0].writemask; > + > + /* copy out the writemask, index and swizzles for all src/dsts. */ > + for (j = 0; j < 2; j++) { > + initial_dst_writemask[j] = inst->dst[j].writemask; > + initial_dst_idx[j] = inst->dst[j].index; > + } > + > + for (j = 0; j < 4; j++) { > + initial_src_swz[j] = inst->src[j].swizzle; > + initial_src_idx[j] = inst->src[j].index; > + } > + > + /* > + * scan all the components in the dst writemask > + * generate an instruction for each of them if required. > + */ > + while (writemask) { > + > + int i = u_bit_scan(&writemask); > + > + /* first time use previous instruction */ > + if (dinst == NULL) { > + dinst = inst; > + } else { > + /* create a new instructions for subsequent attempts */ > + dinst = new(mem_ctx) glsl_to_tgsi_instruction(); > + *dinst = *inst; > + dinst->next = NULL; > + dinst->prev = NULL; > + this->instructions.push_tail(dinst); > + } > + > + /* modify the destination if we are splitting */ > + for (j = 0; j < 2; j++) { > + if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) { > + dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : > WRITEMASK_XY; > + dinst->dst[j].index = initial_dst_idx[j]; > + if (i > 1) > + dinst->dst[j].index++; > + } else { > + /* if we aren't writing to a double, just get the bit of the > initial writemask > + for this channel */ > + dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); > + } > + } > + > + /* modify the src registers */ > + for (j = 0; j < 4; j++) { > + int swz = GET_SWZ(initial_src_swz[j], i); > + > + if (dinst->src[j].type == GLSL_TYPE_DOUBLE) { > + dinst->src[j].index = initial_src_idx[j]; > + if (swz > 1) > + dinst->src[j].index++; > + > + if (swz & 1) > + dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, > SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); > + else > + dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, > SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); > + > + } else { > + /* some opcodes are special case in what they use as sources > + - F2D is a float src0, DLDEXP is integer src1 */ > + if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_DLDEXP || (op > == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) { 80 chars. I know some of the other lines probably don't fit either, but... this is really egregious. > + dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); > + } > + } > + } > + } > + inst = dinst; > + } > + > + > return inst; > } > @@ -909,23 +1067,40 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file > file, > > int index = 0; > immediate_storage *entry; > + int size32 = size * (datatype == GL_DOUBLE ? 2 : 1); > + int i; > > /* Search immediate storage to see if we already have an identical > * immediate that we can use instead of adding a duplicate entry. > */ > foreach_in_list(immediate_storage, entry, &this->immediates) { > - if (entry->size == size && > - entry->type == datatype && > - !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { > - return index; > + immediate_storage *tmp = entry; > + > + for (i = 0; i * 4 < size32; i++) { > + int slot_size = MIN2(size32 - (i * 4), 4); > + if (tmp->type != datatype || tmp->size32 != slot_size) > + break; > + if (memcmp(tmp->values, &values[i * 4], > + slot_size * sizeof(gl_constant_value))) > + break; > + > + /* Everything matches, keep going until the full size is matched */ OK, this is my bad coz it came from one of my 'cleanup' patches. But there's a tmp = tmp->next; missing (or something along those lines). > } > + > + /* The full value matched */ > + if (i * 4 >= size32) > + return index; > + > index++; > } With those fixed, this is Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev