On Tuesday, May 3, 2016 2:22:00 PM PDT Samuel Iglesias Gonsálvez wrote: > From: Iago Toral Quiroga <ito...@igalia.com> > > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 49 ++++++++++++++++++++++++++++++ ++++-- > 1 file changed, 47 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/ i965/brw_fs.cpp > index bc81a80..0e69be8 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -193,8 +193,15 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, > else > op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; > > + /* The pull load message will load a vec4 (16 bytes). If we are loading > + * a double this means we are only loading 2 elements worth of data. > + * We also want to use a 32-bit data type for the dst of the load operation > + * so other parts of the driver don't get confused about the size of the > + * result. > + */ > int regs_written = 4 * (bld.dispatch_width() / 8) * scale; > - fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), dst.type); > + fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), > + BRW_REGISTER_TYPE_F); > fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); > inst->regs_written = regs_written; > > @@ -207,7 +214,45 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, > inst->mlen = 1 + bld.dispatch_width() / 8; > } > > - bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); > + vec4_result.type = dst.type; > + > + /* Our VARYING_PULL_CONSTANT_LOAD reads a vector of 32-bit elements. If we > + * are reading doubles this means that we get this: > + * > + * r0: x0 x0 x0 x0 x0 x0 x0 x0 > + * r1: x1 x1 x1 x1 x1 x1 x1 x1 > + * r2: y0 y0 y0 y0 y0 y0 y0 y0 > + * r3: y1 y1 y1 y1 y1 y1 y1 y1 > + * > + * Fix this up so we return valid double elements: > + * > + * r0: x0 x1 x0 x1 x0 x1 x0 x1 > + * r1: x0 x1 x0 x1 x0 x1 x0 x1 > + * r2: y0 y1 y0 y1 y0 y1 y0 y1 > + * r3: y0 y1 y0 y1 y0 y1 y0 y1 > + */
I think this could be simplified a little... > + if (type_sz(dst.type) == 8) { > + int multiplier = bld.dispatch_width() / 8; > + fs_reg fixed_res = > + fs_reg(VGRF, alloc.allocate(2 * multiplier), BRW_REGISTER_TYPE_F); > + /* We only have 2 doubles in a 32-bit vec4 */ > + for (int i = 0; i < 2; i++) { > + fs_reg vec4_float = > + horiz_offset(retype(vec4_result, BRW_REGISTER_TYPE_F), > + multiplier * 16 * i); > + > + bld.MOV(stride(fixed_res, 2), vec4_float); ^^^ copies x0 or y0 into fixed_res > + bld.MOV(stride(horiz_offset(fixed_res, 1), 2), > + horiz_offset(vec4_float, 8 * multiplier)); > + ^^^ copies x1 or y1 into fixed_res > + bld.MOV(horiz_offset(vec4_result, multiplier * 8 * i), > + retype(fixed_res, BRW_REGISTER_TYPE_DF)); This just copies fixed_res back over vec4_result? I don't think we need to do this - vec4_result is just a temporary. We really want the final result in dst. > + } > + } > + > + int type_slots = MAX2(type_sz(dst.type) / 4, 1); > + bld.MOV(dst, offset(vec4_result, bld, > + ((const_offset & 0xf) / (4 * type_slots)) * scale)); > } How about we simplify this to: if (type_sz(dst.type) == 8) { int multiplier = bld.dispatch_width() / 8; fs_reg fixed_res = fs_reg(VGRF, alloc.allocate(2 * multiplier), BRW_REGISTER_TYPE_F); /* We only have 2 doubles in a 32-bit vec4 */ for (int i = 0; i < 2; i++) { fs_reg vec4_float = horiz_offset(retype(vec4_result, BRW_REGISTER_TYPE_F), multiplier * 16 * i); bld.MOV(stride(fixed_res, 2), vec4_float); bld.MOV(stride(horiz_offset(fixed_res, 1), 2), horiz_offset(vec4_float, 8 * multiplier)); // Differences start here: no extra MOV, simplify the end a bit } assert(scale == 1); bld.MOV(dst, offset(fixed_res, bld, (const_offset & 0xf) / 8)); } else { bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); } With that change, Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev