Align16 is no more. We previously generated an align16 ADD instruction to calculate DDY:
add(8) g11<1>F -g10<4>.xyxyF g10<4>.zwzwF { align16 1Q }; Without align16, we now implement it as two align1 instructions: add(4) g11<2>F -g10<4,2,0>F g10.2<4,2,0>F { align1 1N }; add(4) g11.1<2>F -g10.1<4,2,0>F g10.3<4,2,0>F { align1 1N }; --- src/intel/compiler/brw_fs_generator.cpp | 70 ++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 013d2c820a0..ffc46972420 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst, { if (inst->opcode == FS_OPCODE_DDY_FINE) { /* produce accurate derivatives */ - struct brw_reg src0 = src; - struct brw_reg src1 = src; + if (devinfo->gen >= 11) { + struct brw_reg x = src; + struct brw_reg y = src; + struct brw_reg z = src; + struct brw_reg w = src; + struct brw_reg dst_e = dst; + struct brw_reg dst_o = dst; + + x.vstride = BRW_VERTICAL_STRIDE_4; + y.vstride = BRW_VERTICAL_STRIDE_4; + z.vstride = BRW_VERTICAL_STRIDE_4; + w.vstride = BRW_VERTICAL_STRIDE_4; + + x.width = BRW_WIDTH_2; + y.width = BRW_WIDTH_2; + z.width = BRW_WIDTH_2; + w.width = BRW_WIDTH_2; + + x.hstride = BRW_HORIZONTAL_STRIDE_0; + y.hstride = BRW_HORIZONTAL_STRIDE_0; + z.hstride = BRW_HORIZONTAL_STRIDE_0; + w.hstride = BRW_HORIZONTAL_STRIDE_0; + + x.subnr = 0 * sizeof(float); + y.subnr = 1 * sizeof(float); + z.subnr = 2 * sizeof(float); + w.subnr = 3 * sizeof(float); + + dst_e.hstride = BRW_HORIZONTAL_STRIDE_2; + dst_o.hstride = BRW_HORIZONTAL_STRIDE_2; + dst_o.subnr = sizeof(float); - src0.swizzle = BRW_SWIZZLE_XYXY; - src0.vstride = BRW_VERTICAL_STRIDE_4; - src0.width = BRW_WIDTH_4; - src0.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_push_insn_state(p); + if (inst->exec_size == 8) + brw_set_default_exec_size(p, BRW_EXECUTE_4); + else + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_ADD(p, dst_e, negate(x), z); + brw_ADD(p, dst_o, negate(y), w); + brw_pop_insn_state(p); + } else { + struct brw_reg src0 = src; + struct brw_reg src1 = src; - src1.swizzle = BRW_SWIZZLE_ZWZW; - src1.vstride = BRW_VERTICAL_STRIDE_4; - src1.width = BRW_WIDTH_4; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; + src0.swizzle = BRW_SWIZZLE_XYXY; + src0.vstride = BRW_VERTICAL_STRIDE_4; + src0.width = BRW_WIDTH_4; + src0.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, dst, negate(src0), src1); - brw_pop_insn_state(p); + src1.swizzle = BRW_SWIZZLE_ZWZW; + src1.vstride = BRW_VERTICAL_STRIDE_4; + src1.width = BRW_WIDTH_4; + src1.hstride = BRW_HORIZONTAL_STRIDE_1; + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, dst, negate(src0), src1); + brw_pop_insn_state(p); + } } else { /* replicate the derivative at the top-left pixel to other pixels */ struct brw_reg src0 = src; -- 2.16.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev