On Fri, Jan 18, 2019 at 6:09 PM Francisco Jerez <curroje...@riseup.net> wrote:
> Currently the execution type calculation will return a bogus value in > cases like: > > mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u > > Which will be considered to have a 32-bit integer execution type even > though the actual indirect move operation will be carried out with > 16-bit precision. > > Similarly there's no need to apply the CHV/BXT double-precision region > alignment restrictions to such control sources, since they aren't > directly involved in the double-precision arithmetic operations > emitted by these virtual instructions. Applying the CHV/BXT > restrictions to control sources was expected to be harmless if mildly > inefficient, but unfortunately it exposed problems at codegen level > for virtual instructions (namely the SHUFFLE instruction used for the > Vulkan 1.1 subgroup feature) that weren't prepared to accept control > sources with an arbitrary strided region. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328 > Reported-by: Mark Janes <mark.a.ja...@intel.com> > Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass." > --- > src/intel/compiler/brw_fs.cpp | 54 +++++++++++++++++++ > src/intel/compiler/brw_fs_lower_regioning.cpp | 6 +-- > src/intel/compiler/brw_ir_fs.h | 10 +++- > 3 files changed, 66 insertions(+), 4 deletions(-) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 0359eb079f7..f475b617df2 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -271,6 +271,60 @@ fs_inst::is_send_from_grf() const > } > } > > +bool > +fs_inst::is_control_source(unsigned arg) const > +{ > + switch (opcode) { > + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: > + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: > + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: > + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: > + return arg == 0; > + > + case SHADER_OPCODE_BROADCAST: > + case SHADER_OPCODE_SHUFFLE: > + case SHADER_OPCODE_QUAD_SWIZZLE: > + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: > + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: > + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: > + case SHADER_OPCODE_IMAGE_SIZE: > + case SHADER_OPCODE_GET_BUFFER_SIZE: > + return arg == 1; > + > + case SHADER_OPCODE_MOV_INDIRECT: > + case SHADER_OPCODE_CLUSTER_BROADCAST: > + case SHADER_OPCODE_TEX: > + case FS_OPCODE_TXB: > + case SHADER_OPCODE_TXD: > + case SHADER_OPCODE_TXF: > + case SHADER_OPCODE_TXF_LZ: > + case SHADER_OPCODE_TXF_CMS: > + case SHADER_OPCODE_TXF_CMS_W: > + case SHADER_OPCODE_TXF_UMS: > + case SHADER_OPCODE_TXF_MCS: > + case SHADER_OPCODE_TXL: > + case SHADER_OPCODE_TXL_LZ: > + case SHADER_OPCODE_TXS: > + case SHADER_OPCODE_LOD: > + case SHADER_OPCODE_TG4: > + case SHADER_OPCODE_TG4_OFFSET: > + case SHADER_OPCODE_SAMPLEINFO: > + case SHADER_OPCODE_UNTYPED_ATOMIC: > + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: > + case SHADER_OPCODE_UNTYPED_SURFACE_READ: > + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_READ: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + case SHADER_OPCODE_TYPED_ATOMIC: > + case SHADER_OPCODE_TYPED_SURFACE_READ: > + case SHADER_OPCODE_TYPED_SURFACE_WRITE: > As of b284d222db, we are no longer using many of the opcodes in this list (gen7 pull constant loads, [un]typed surface reads/writes, etc.) It will need to be rebased and we need to add SHADER_OPCODE_SEND to the list. Fortunately, the changes to add SHADER_OPCODE_SEND landed before the 19.0 cut-off so there is no need to make two versions for backporting. Other than that, this patch seems perfectly reasonable to me Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> If you want me to hand-review the new list of opcodes, feel free to send a v2 and cc me. > + return arg == 1 || arg == 2; > + > + default: > + return false; > + } > +} > + > /** > * Returns true if this instruction's sources and destinations cannot > * safely be the same register. > diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp > b/src/intel/compiler/brw_fs_lower_regioning.cpp > index df50993dee6..6a3c23892b4 100644 > --- a/src/intel/compiler/brw_fs_lower_regioning.cpp > +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp > @@ -74,7 +74,7 @@ namespace { > unsigned stride = inst->dst.stride * type_sz(inst->dst.type); > > for (unsigned i = 0; i < inst->sources; i++) { > - if (!is_uniform(inst->src[i])) > + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) > stride = MAX2(stride, inst->src[i].stride * > type_sz(inst->src[i].type)); > } > @@ -92,7 +92,7 @@ namespace { > required_dst_byte_offset(const fs_inst *inst) > { > for (unsigned i = 0; i < inst->sources; i++) { > - if (!is_uniform(inst->src[i])) > + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) > if (reg_offset(inst->src[i]) % REG_SIZE != > reg_offset(inst->dst) % REG_SIZE) > return 0; > @@ -109,7 +109,7 @@ namespace { > has_invalid_src_region(const gen_device_info *devinfo, const fs_inst > *inst, > unsigned i) > { > - if (is_unordered(inst)) { > + if (is_unordered(inst) || inst->is_control_source(i)) { > return false; > } else { > const unsigned dst_byte_stride = inst->dst.stride * > type_sz(inst->dst.type); > diff --git a/src/intel/compiler/brw_ir_fs.h > b/src/intel/compiler/brw_ir_fs.h > index 08e3d83d910..0a0ba1d363a 100644 > --- a/src/intel/compiler/brw_ir_fs.h > +++ b/src/intel/compiler/brw_ir_fs.h > @@ -358,6 +358,13 @@ public: > bool can_change_types() const; > bool has_source_and_destination_hazard() const; > > + /** > + * Return whether \p arg is a control source of a virtual instruction > which > + * shouldn't contribute to the execution type and usual regioning > + * restriction calculations of arithmetic instructions. > + */ > + bool is_control_source(unsigned arg) const; > + > /** > * Return the subset of flag registers read by the instruction as a > bitset > * with byte granularity. > @@ -462,7 +469,8 @@ get_exec_type(const fs_inst *inst) > brw_reg_type exec_type = BRW_REGISTER_TYPE_B; > > for (int i = 0; i < inst->sources; i++) { > - if (inst->src[i].file != BAD_FILE) { > + if (inst->src[i].file != BAD_FILE && > + !inst->is_control_source(i)) { > const brw_reg_type t = get_exec_type(inst->src[i].type); > if (type_sz(t) > type_sz(exec_type)) > exec_type = t; > -- > 2.19.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev