This patch is Reviewed-by: Ian Romanick <ian.d.roman...@intel.com>
We may be able to eliminate some of this after I do int64 support. It might be cleaner to do unpackInt2x32(doubleBitsToInt64(x)) at a higher level of the compiler instead. On 10/11/2016 02:01 AM, Iago Toral Quiroga wrote: > These opcodes will pick the low/high 32-bit in each 64-bit data element > using Align1 mode. We will use this, for example, to do things like > unpackDouble2x32. > > We use Align1 mode because in order to implement this in Align16 mode > we would need to use 32-bit logical swizzles (XZ for low, YW for high), > but the IR works in terms of 64-bit logical swizzles for DF operands > all the way up to codegen. > > v2: > - use suboffset() instead of get_element_ud() > - no need to set the width on the dst > --- > src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ > src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++ > src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 ++++ > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 > ++++++++++++++++++++++++ > 4 files changed, 35 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 79b96a4..8ffb50c 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1100,6 +1100,8 @@ enum opcode { > VEC4_OPCODE_UNPACK_UNIFORM, > VEC4_OPCODE_DOUBLE_TO_FLOAT, > VEC4_OPCODE_FLOAT_TO_DOUBLE, > + VEC4_OPCODE_PICK_LOW_32BIT, > + VEC4_OPCODE_PICK_HIGH_32BIT, > > FS_OPCODE_DDX_COARSE, > FS_OPCODE_DDX_FINE, > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index b063f77..b2f3a56 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -321,6 +321,10 @@ brw_instruction_name(const struct gen_device_info > *devinfo, enum opcode op) > return "double_to_float"; > case VEC4_OPCODE_FLOAT_TO_DOUBLE: > return "float_to_double"; > + case VEC4_OPCODE_PICK_LOW_32BIT: > + return "pick_low_32bit"; > + case VEC4_OPCODE_PICK_HIGH_32BIT: > + return "pick_high_32bit"; > > case FS_OPCODE_DDX_COARSE: > return "ddx_coarse"; > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index 40f8702..4fd04f1 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -255,6 +255,8 @@ vec4_instruction::can_do_writemask(const struct > gen_device_info *devinfo) > case SHADER_OPCODE_GEN4_SCRATCH_READ: > case VEC4_OPCODE_DOUBLE_TO_FLOAT: > case VEC4_OPCODE_FLOAT_TO_DOUBLE: > + case VEC4_OPCODE_PICK_LOW_32BIT: > + case VEC4_OPCODE_PICK_HIGH_32BIT: > case VS_OPCODE_PULL_CONSTANT_LOAD: > case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: > @@ -510,6 +512,8 @@ vec4_visitor::opt_reduce_swizzle() > > case VEC4_OPCODE_FLOAT_TO_DOUBLE: > case VEC4_OPCODE_DOUBLE_TO_FLOAT: > + case VEC4_OPCODE_PICK_LOW_32BIT: > + case VEC4_OPCODE_PICK_HIGH_32BIT: > swizzle = brw_swizzle_for_size(4); > break; > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index 6f4c438..b8778c4 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1940,6 +1940,31 @@ generate_code(struct brw_codegen *p, > break; > } > > + case VEC4_OPCODE_PICK_LOW_32BIT: > + case VEC4_OPCODE_PICK_HIGH_32BIT: { > + /* Stores the low/high 32-bit of each 64-bit element in src[0] into > + * dst using ALIGN1 mode and a <8,4,2>:UD region on the source. > + */ > + assert(type_sz(src[0].type) == 8); > + assert(type_sz(dst.type) == 4); > + > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + > + dst = retype(dst, BRW_REGISTER_TYPE_UD); > + dst.hstride = BRW_HORIZONTAL_STRIDE_1; > + > + src[0] = retype(src[0], BRW_REGISTER_TYPE_UD); > + if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT) > + src[0] = suboffset(src[0], 1); > + src[0].vstride = BRW_VERTICAL_STRIDE_8; > + src[0].width = BRW_WIDTH_4; > + src[0].hstride = BRW_HORIZONTAL_STRIDE_2; > + brw_MOV(p, dst, src[0]); > + > + brw_set_default_access_mode(p, BRW_ALIGN_16); > + break; > + } > + > case VEC4_OPCODE_PACK_BYTES: { > /* Is effectively: > * > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev