On Thu, Feb 14, 2019 at 9:33 PM Francisco Jerez <curroje...@riseup.net> wrote:
> Jason Ekstrand <ja...@jlekstrand.net> writes: > > > On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <curroje...@riseup.net> > > wrote: > > > >> Currently the execution type calculation will return a bogus value in > >> cases like: > >> > >> mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u > >> > >> Which will be considered to have a 32-bit integer execution type even > >> though the actual indirect move operation will be carried out with > >> 16-bit precision. > >> > >> Similarly there's no need to apply the CHV/BXT double-precision region > >> alignment restrictions to such control sources, since they aren't > >> directly involved in the double-precision arithmetic operations > >> emitted by these virtual instructions. Applying the CHV/BXT > >> restrictions to control sources was expected to be harmless if mildly > >> inefficient, but unfortunately it exposed problems at codegen level > >> for virtual instructions (namely the SHUFFLE instruction used for the > >> Vulkan 1.1 subgroup feature) that weren't prepared to accept control > >> sources with an arbitrary strided region. > >> > >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328 > >> Reported-by: Mark Janes <mark.a.ja...@intel.com> > >> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass." > >> --- > >> src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++ > >> src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +-- > >> src/intel/compiler/brw_ir_fs.h | 10 +++- > >> 3 files changed, 66 insertions(+), 4 deletions(-) > >> > >> diff --git a/src/intel/compiler/brw_fs.cpp > b/src/intel/compiler/brw_fs.cpp > >> index 0359eb079f7..f475b617df2 100644 > >> --- a/src/intel/compiler/brw_fs.cpp > >> +++ b/src/intel/compiler/brw_fs.cpp > >> @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const > >> } > >> } > >> > >> +bool > >> +fs_inst::is_control_source(unsigned arg) const > >> +{ > >> + switch (opcode) { > >> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: > >> + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: > >> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: > >> + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: > >> + return arg == 0; > >> + > >> + case SHADER_OPCODE_BROADCAST: > >> + case SHADER_OPCODE_SHUFFLE: > >> + case SHADER_OPCODE_QUAD_SWIZZLE: > >> + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: > >> + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: > >> + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: > >> + case SHADER_OPCODE_IMAGE_SIZE: > >> + case SHADER_OPCODE_GET_BUFFER_SIZE: > >> + return arg == 1; > >> + > >> + case SHADER_OPCODE_MOV_INDIRECT: > >> + case SHADER_OPCODE_CLUSTER_BROADCAST: > >> + case SHADER_OPCODE_TEX: > >> + case FS_OPCODE_TXB: > >> + case SHADER_OPCODE_TXD: > >> + case SHADER_OPCODE_TXF: > >> + case SHADER_OPCODE_TXF_LZ: > >> + case SHADER_OPCODE_TXF_CMS: > >> + case SHADER_OPCODE_TXF_CMS_W: > >> + case SHADER_OPCODE_TXF_UMS: > >> + case SHADER_OPCODE_TXF_MCS: > >> + case SHADER_OPCODE_TXL: > >> + case SHADER_OPCODE_TXL_LZ: > >> + case SHADER_OPCODE_TXS: > >> + case SHADER_OPCODE_LOD: > >> + case SHADER_OPCODE_TG4: > >> + case SHADER_OPCODE_TG4_OFFSET: > >> + case SHADER_OPCODE_SAMPLEINFO: > >> + case SHADER_OPCODE_UNTYPED_ATOMIC: > >> + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: > >> + case SHADER_OPCODE_UNTYPED_SURFACE_READ: > >> + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > >> + case SHADER_OPCODE_BYTE_SCATTERED_READ: > >> + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > >> + case SHADER_OPCODE_TYPED_ATOMIC: > >> + case SHADER_OPCODE_TYPED_SURFACE_READ: > >> + case SHADER_OPCODE_TYPED_SURFACE_WRITE: > >> > > > > As of b284d222db, we are no longer using many of the opcodes in this list > > (gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will > > need to be rebased and we need to add SHADER_OPCODE_SEND to the list. > > Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0 > > cut-off so there is no need to make two versions for backporting. > > > > Yes, that's roughly what I had done during one of my previous rebases of > this series, see: > > > https://cgit.freedesktop.org/~currojerez/mesa/commit/?h=jenkins&id=30f8f3ff48b02ead688705e0679a98c0d6c9c87e > Looks good. Some of those opcodes are no longer used. I sent out a MR to clean some of it up but I think it's better if this lands first so that it can be more cleanly back-ported Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > > Other than that, this patch seems perfectly reasonable to me > > > > Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > > > > If you want me to hand-review the new list of opcodes, feel free to send > a > > v2 and cc me. > > > > > >> + return arg == 1 || arg == 2; > >> + > >> + default: > >> + return false; > >> + } > >> +} > >> + > >> /** > >> * Returns true if this instruction's sources and destinations cannot > >> * safely be the same register. > >> diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp > >> b/src/intel/compiler/brw_fs_lower_regioning.cpp > >> index df50993dee6..6a3c23892b4 100644 > >> --- a/src/intel/compiler/brw_fs_lower_regioning.cpp > >> +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp > >> @@ -74,7 +74,7 @@ namespace { > >> unsigned stride = inst->dst.stride * type_sz(inst->dst.type); > >> > >> for (unsigned i = 0; i < inst->sources; i++) { > >> - if (!is_uniform(inst->src[i])) > >> + if (!is_uniform(inst->src[i]) && > !inst->is_control_source(i)) > >> stride = MAX2(stride, inst->src[i].stride * > >> type_sz(inst->src[i].type)); > >> } > >> @@ -92,7 +92,7 @@ namespace { > >> required_dst_byte_offset(const fs_inst *inst) > >> { > >> for (unsigned i = 0; i < inst->sources; i++) { > >> - if (!is_uniform(inst->src[i])) > >> + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) > >> if (reg_offset(inst->src[i]) % REG_SIZE != > >> reg_offset(inst->dst) % REG_SIZE) > >> return 0; > >> @@ -109,7 +109,7 @@ namespace { > >> has_invalid_src_region(const gen_device_info *devinfo, const fs_inst > >> *inst, > >> unsigned i) > >> { > >> - if (is_unordered(inst)) { > >> + if (is_unordered(inst) || inst->is_control_source(i)) { > >> return false; > >> } else { > >> const unsigned dst_byte_stride = inst->dst.stride * > >> type_sz(inst->dst.type); > >> diff --git a/src/intel/compiler/brw_ir_fs.h > >> b/src/intel/compiler/brw_ir_fs.h > >> index 08e3d83d910..0a0ba1d363a 100644 > >> --- a/src/intel/compiler/brw_ir_fs.h > >> +++ b/src/intel/compiler/brw_ir_fs.h > >> @@ -358,6 +358,13 @@ public: > >> bool can_change_types() const; > >> bool has_source_and_destination_hazard() const; > >> > >> + /** > >> + * Return whether \p arg is a control source of a virtual > instruction > >> which > >> + * shouldn't contribute to the execution type and usual regioning > >> + * restriction calculations of arithmetic instructions. > >> + */ > >> + bool is_control_source(unsigned arg) const; > >> + > >> /** > >> * Return the subset of flag registers read by the instruction as a > >> bitset > >> * with byte granularity. > >> @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst) > >> brw_reg_type exec_type = BRW_REGISTER_TYPE_B; > >> > >> for (int i = 0; i < inst->sources; i++) { > >> - if (inst->src[i].file != BAD_FILE) { > >> + if (inst->src[i].file != BAD_FILE && > >> + !inst->is_control_source(i)) { > >> const brw_reg_type t = get_exec_type(inst->src[i].type); > >> if (type_sz(t) > type_sz(exec_type)) > >> exec_type = t; > >> -- > >> 2.19.2 > >> > >> _______________________________________________ > >> mesa-dev mailing list > >> mesa-dev@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > >> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev