This patch is split out for review. It will be squashed before pushing. --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++- src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 44 ++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 2 ++ 5 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index c67728b..2c5cd7a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -915,6 +915,7 @@ enum opcode { */ FS_OPCODE_FB_WRITE_LOGICAL, + FS_OPCODE_PACK_STENCIL_REF, FS_OPCODE_BLORP_FB_WRITE, FS_OPCODE_REP_FB_WRITE, SHADER_OPCODE_RCP, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 560eb91..c962043 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3440,7 +3440,11 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, if (src_stencil.file != BAD_FILE) { assert(devinfo->gen >= 9); assert(bld.dispatch_width() != 16); - sources[length] = src_stencil; + + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.exec_all().annotate("FB write OS") + .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], + retype(src_stencil, BRW_REGISTER_TYPE_UB)); length++; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4f59d4b..e2bc469 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -419,6 +419,8 @@ private: void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst, + struct brw_reg src); void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1a893c9..80cc6ad 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -415,6 +415,46 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_stencil_ref_packing(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(dispatch_width == 8); + assert(devinfo->gen >= 9); + + /* Stencil value updates are provided in 8 slots of 1 byte per slot. + * Presumably, in order to save memory bandwidth, the stencil reference + * values written from the FS need to be packed into 2 dwords (this makes + * sense because the stencil values are limited to 1 byte each and a SIMD8 + * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.) + * + * The spec is confusing here because in the payload definition of MDP_RTW_S8 + * (Message Data Payload for Render Target Writes with Stencil 8b) the + * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of + * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the + * packed values specified above and diagrammed below: + * + * 31 0 + * -------------------------------- + * DW | | + * 2-7 | IGNORED | + * | | + * -------------------------------- + * DW1 | STC | STC | STC | STC | + * | slot7 | slot6 | slot5 | slot4| + * -------------------------------- + * DW0 | STC | STC | STC | STC | + * | slot3 | slot2 | slot1 | slot0| + * -------------------------------- + */ + + src.width = BRW_WIDTH_1; + src.hstride = BRW_HORIZONTAL_STRIDE_0; + src.vstride = BRW_VERTICAL_STRIDE_4; + brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), retype(src, BRW_REGISTER_TYPE_UB)); +} + +void fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) { brw_barrier(p, src); @@ -2153,6 +2193,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_barrier(inst, src[0]); break; + case FS_OPCODE_PACK_STENCIL_REF: + generate_stencil_ref_packing(inst, dst, src[0]); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2324b56..c38e34e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -290,6 +290,8 @@ brw_instruction_name(enum opcode op) return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; + case FS_OPCODE_PACK_STENCIL_REF: + return "fb write output stencil"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: -- 2.6.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev