On Fri, Oct 20, 2017 at 1:42 PM, Scott D Phillips <scott.d.phill...@intel.com> wrote: > Matt Turner <matts...@gmail.com> writes: > >> --- >> src/intel/compiler/brw_eu_emit.c | 219 >> +++++++++++++++++++++++++++++---------- >> 1 file changed, 166 insertions(+), 53 deletions(-) >> >> diff --git a/src/intel/compiler/brw_eu_emit.c >> b/src/intel/compiler/brw_eu_emit.c >> index 7495a19fd8..4f98860044 100644 >> --- a/src/intel/compiler/brw_eu_emit.c >> +++ b/src/intel/compiler/brw_eu_emit.c >> @@ -678,64 +678,177 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, >> struct brw_reg dest, >> >> gen7_convert_mrf_to_grf(p, &dest); >> >> - assert(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16); >> - >> - assert(dest.file == BRW_GENERAL_REGISTER_FILE || >> - dest.file == BRW_MESSAGE_REGISTER_FILE); >> assert(dest.nr < 128); >> + assert(src0.nr < 128); >> + assert(src1.nr < 128); >> + assert(src2.nr < 128); >> assert(dest.address_mode == BRW_ADDRESS_DIRECT); >> - assert(dest.type == BRW_REGISTER_TYPE_F || >> - dest.type == BRW_REGISTER_TYPE_DF || >> - dest.type == BRW_REGISTER_TYPE_D || >> - dest.type == BRW_REGISTER_TYPE_UD); >> - if (devinfo->gen == 6) { >> - brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, >> - dest.file == >> BRW_MESSAGE_REGISTER_FILE); >> - } >> - brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); >> - brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); >> - brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); >> - >> - assert(src0.file == BRW_GENERAL_REGISTER_FILE); >> assert(src0.address_mode == BRW_ADDRESS_DIRECT); >> - assert(src0.nr < 128); >> - brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); >> - brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, >> get_3src_subreg_nr(src0)); >> - brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); >> - brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); >> - brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); >> - brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, >> - src0.vstride == >> BRW_VERTICAL_STRIDE_0); >> - >> - assert(src1.file == BRW_GENERAL_REGISTER_FILE); >> assert(src1.address_mode == BRW_ADDRESS_DIRECT); >> - assert(src1.nr < 128); >> - brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); >> - brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); >> - brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); >> - brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst, >> - src1.vstride == >> BRW_VERTICAL_STRIDE_0); >> - >> - assert(src2.file == BRW_GENERAL_REGISTER_FILE); >> assert(src2.address_mode == BRW_ADDRESS_DIRECT); >> - assert(src2.nr < 128); >> - brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); >> - brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, >> get_3src_subreg_nr(src2)); >> - brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); >> - brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); >> - brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); >> - brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, >> - src2.vstride == >> BRW_VERTICAL_STRIDE_0); >> - >> - if (devinfo->gen >= 7) { >> - /* Set both the source and destination types based on dest.type, >> - * ignoring the source register types. The MAD and LRP emitters >> ensure >> - * that all four types are float. The BFE and BFI2 emitters, however, >> - * may send us mixed D and UD types and want us to ignore that and use >> - * the destination type. >> - */ >> - brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); >> - brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); >> + >> + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { >> + assert(dest.file == BRW_GENERAL_REGISTER_FILE || >> + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); >> + >> + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { >> + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, >> + BRW_ALIGN1_3SRC_ACCUMULATOR); >> + brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); >> + } else { >> + brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, >> + >> BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); >> + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); >> + } >> + brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8); >> + >> + brw_inst_set_3src_a1_dst_hstride(devinfo, inst, >> BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1); >> + >> + if (brw_reg_type_is_floating_point(dest.type)) { >> + brw_inst_set_3src_a1_exec_type(devinfo, inst, >> + BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); >> + } else { >> + brw_inst_set_3src_a1_exec_type(devinfo, inst, >> + BRW_ALIGN1_3SRC_EXEC_TYPE_INT); >> + } >> + >> + brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type); >> + brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type); >> + brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); >> + brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); >> + >> + assert((src0.vstride == BRW_VERTICAL_STRIDE_0 && >> + src0.hstride == BRW_HORIZONTAL_STRIDE_0) || >> + (src0.vstride == BRW_VERTICAL_STRIDE_8 && >> + src0.hstride == BRW_HORIZONTAL_STRIDE_1) || >> + (src0.vstride == BRW_VERTICAL_STRIDE_16 && >> + src0.hstride == BRW_HORIZONTAL_STRIDE_1)); > > I don't follow why VERTICAL_STRIDE_16 is ok here, but it gets written to > the instruction as VERTICAL_STRIDE_8 below.
In a version of the patch before I sent anything for review, I had the asserts you asked for. I thought I remembered needing 16, but it doesn't seem to be required, at least according to the little bit of touch testing I just did. I'll remove them. >> + assert((src1.vstride == BRW_VERTICAL_STRIDE_0 && >> + src1.hstride == BRW_HORIZONTAL_STRIDE_0) || >> + (src1.vstride == BRW_VERTICAL_STRIDE_8 && >> + src1.hstride == BRW_HORIZONTAL_STRIDE_1) || >> + (src1.vstride == BRW_VERTICAL_STRIDE_16 && >> + src1.hstride == BRW_HORIZONTAL_STRIDE_1)); >> + assert((src2.vstride == BRW_VERTICAL_STRIDE_0 && >> + src2.hstride == BRW_HORIZONTAL_STRIDE_0) || >> + (src2.vstride == BRW_VERTICAL_STRIDE_8 && >> + src2.hstride == BRW_HORIZONTAL_STRIDE_1) || >> + (src2.vstride == BRW_VERTICAL_STRIDE_16 && >> + src2.hstride == BRW_HORIZONTAL_STRIDE_1)); >> + >> + brw_inst_set_3src_a1_src0_vstride(devinfo, inst, >> + src0.vstride == >> BRW_VERTICAL_STRIDE_0 ? >> + BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0 : >> + BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8); >> + brw_inst_set_3src_a1_src1_vstride(devinfo, inst, >> + src1.vstride == >> BRW_VERTICAL_STRIDE_0 ? >> + BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0 : >> + BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8); >> + /* no vstride on src2 */ >> + >> + brw_inst_set_3src_a1_src0_hstride(devinfo, inst, >> + src0.hstride == >> BRW_HORIZONTAL_STRIDE_0 ? >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0 : >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1); >> + brw_inst_set_3src_a1_src1_hstride(devinfo, inst, >> + src1.hstride == >> BRW_HORIZONTAL_STRIDE_0 ? >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0 : >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1); >> + brw_inst_set_3src_a1_src2_hstride(devinfo, inst, >> + src2.hstride == >> BRW_HORIZONTAL_STRIDE_0 ? >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0 : >> + >> BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1); >> + >> + brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); >> + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); >> + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); >> + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); >> + >> + brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr); >> + if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) { >> + brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); >> + } else { >> + brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); >> + } >> + brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); >> + brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); >> + >> + brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); >> + brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); >> + brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); >> + brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); >> + >> + assert(src0.file == BRW_GENERAL_REGISTER_FILE || >> + src0.file == BRW_IMMEDIATE_VALUE); >> + assert(src1.file == BRW_GENERAL_REGISTER_FILE || >> + src1.file == BRW_ARCHITECTURE_REGISTER_FILE); >> + assert(src2.file == BRW_GENERAL_REGISTER_FILE || >> + src2.file == BRW_IMMEDIATE_VALUE); >> + >> + brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, >> + src0.file == >> BRW_GENERAL_REGISTER_FILE ? >> + >> BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : >> + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); >> + brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, >> + src1.file == >> BRW_GENERAL_REGISTER_FILE ? >> + >> BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : >> + BRW_ALIGN1_3SRC_ACCUMULATOR); >> + brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, >> + src2.file == >> BRW_GENERAL_REGISTER_FILE ? >> + >> BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : >> + BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); >> + } else { >> + assert(dest.file == BRW_GENERAL_REGISTER_FILE || >> + dest.file == BRW_MESSAGE_REGISTER_FILE); >> + assert(dest.type == BRW_REGISTER_TYPE_F || >> + dest.type == BRW_REGISTER_TYPE_DF || >> + dest.type == BRW_REGISTER_TYPE_D || >> + dest.type == BRW_REGISTER_TYPE_UD); >> + if (devinfo->gen == 6) { >> + brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, >> + dest.file == >> BRW_MESSAGE_REGISTER_FILE); >> + } >> + brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); >> + brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16); >> + brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); >> + >> + assert(src0.file == BRW_GENERAL_REGISTER_FILE); >> + brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); >> + brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, >> get_3src_subreg_nr(src0)); >> + brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); >> + brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); >> + brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); >> + brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, >> + src0.vstride == >> BRW_VERTICAL_STRIDE_0); >> + >> + assert(src1.file == BRW_GENERAL_REGISTER_FILE); >> + brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); >> + brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, >> get_3src_subreg_nr(src1)); > > The two lines above got added by this patch. They look good to me, maybe > mention the fix in the commit message. Oh! Nice catch. I think this was a rebase error. I see that in my current tree they were deleted from "i965: Rename brw_inst 3src functions in preparation for align1" which obviously meant to just rename them. Fixed locally. > Provided the vertical stride thing above is right then patches 11 & 13 are: > > Reviewed-by: Scott D Phillips <scott.d.phill...@intel.com> Thanks a ton! _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev