Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 3 +++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 32 ++++++++++++++++++++++++++ 3 files changed, 36 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 88097b7..186d09a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -913,6 +913,7 @@ enum opcode { FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, + FS_OPCODE_UNIFORM_DOUBLE_LOAD, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_OMASK, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0f47464..c51aae4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -735,6 +735,9 @@ private: struct brw_reg dst, struct brw_reg index, struct brw_reg offset); + void generate_uniform_double_float_load(const fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void generate_mov_dispatch_to_flags(fs_inst *inst); void generate_pixel_interpolator_query(fs_inst *inst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4bcb074..39dc563 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1508,6 +1508,34 @@ fs_generator::generate_pack_double_2x32(fs_inst *inst, } void +fs_generator::generate_uniform_double_float_load(const fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(p->brw->gen == 7); + + dst.type = BRW_REGISTER_TYPE_UD; + dst.width = BRW_WIDTH_8; + dst.hstride = BRW_HORIZONTAL_STRIDE_1; + dst.vstride = BRW_VERTICAL_STRIDE_8; + + /* Treat the source as packed pair of 32-bit elements. */ + src.type = BRW_REGISTER_TYPE_UD; + src.width = BRW_WIDTH_2; + src.hstride = BRW_HORIZONTAL_STRIDE_1; + src.vstride = BRW_VERTICAL_STRIDE_0; + + /* Issue two instructions, one move copies only execution width many single + * precision elements. In other words, one instruction writes only + * (execution width / 2) many double precision channels - therefore two + * are needed to write all the channels. + */ + brw_copy_double_float_scalar(p, dst, src); + dst.nr += (inst->exec_size / 8); + brw_copy_double_float_scalar(p, dst, src); +} + +void fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, struct brw_reg dst, struct brw_reg src) @@ -1982,6 +2010,10 @@ fs_generator::generate_code(const cfg_t *cfg) generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; + case FS_OPCODE_UNIFORM_DOUBLE_LOAD: + generate_uniform_double_float_load(inst, dst, src[0]); + break; + case FS_OPCODE_REP_FB_WRITE: case FS_OPCODE_FB_WRITE: generate_fb_write(inst, src[0]); -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev