On Friday, August 12, 2016 10:06:29 PM PDT Francisco Jerez wrote: > The current logic used to determine the execution size of sampler > messages was based on special-casing several argument and opcode > combinations, which unsurprisingly missed the possibility that some > messages could exceed the payload size limit or not depending on the > number of coordinate components present. In particular: > > - The TXL, TXB and TEX messages (the latter on non-FS stages only) > would attempt to use SIMD16 on Gen7+ hardware even if a shadow > reference was present and the texture was a cubemap array, causing > it to overflow the maximum supported sampler payload size and > crash. > > - The TG4_OFFSET message with shadow comparison was falling back to > SIMD8 regardless of the number of coordinate components, which is > unnecessary when two coordinates or less are present. > > Both cases have been handled incorrectly ever since cubemap arrays and > texture gather were respectively enabled (the current logic used by > the SIMD lowering pass is almost unchanged from the previous no16 > fall-back logic used pre-SIMD lowering times). > > Fixes the following GL4.5 conformance test on Gen7-8 (the bug also > affects Gen9+ in principle, but SKL passes the test by luck because it > manages to use the TXL_LZ message instead of TXL): > > GL45-CTS.texture_cube_map_array.sampling > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97267 > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 108 > ++++++++++++++++++++++++----------- > 1 file changed, 74 insertions(+), 34 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 1842d56..b816d24 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -4687,6 +4687,68 @@ get_fpu_lowered_simd_width(const struct > brw_device_info *devinfo, > } > > /** > + * Get the maximum allowed SIMD width for instruction \p inst accounting for > + * various payload size restrictions that apply to sampler message > + * instructions. > + * > + * This is only intended to provide a maximum theoretical bound for the > + * execution size of the message based on the number of argument components > + * alone, which in most cases will determine whether the SIMD8 or SIMD16 > + * variant of the message can be used, though some messages may have > + * additional restrictions not accounted for here (e.g. pre-ILK hardware uses > + * the message length to determine the exact SIMD width and argument count, > + * which makes a number of sampler message combinations impossible to > + * represent). > + */ > +static unsigned > +get_sampler_lowered_simd_width(const struct brw_device_info *devinfo, > + const fs_inst *inst) > +{ > + /* Calculate the number of coordinate components that have to be present > + * assuming that additional arguments follow the texel coordinates in the > + * message payload. On Gen7+ there is no need for padding, on Gen5-6 we > + * need to pad to four or three components depending on the message, on > + * Gen4 we need to pad to at most three components. > + */ > + const unsigned req_coord_components = > + (devinfo->gen >= 7 || > + !inst->components_read(TEX_LOGICAL_SRC_COORDINATE)) ? 0 : > + (devinfo->gen >= 5 && inst->opcode != SHADER_OPCODE_TXF_LOGICAL && > + inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? > 4 : > + 3; > + > + /* On Gen9+ the LOD argument is for free if we're able to use the LZ > + * variant of the TXL or TXF message. > + */ > + const bool implicit_lod = devinfo->gen >= 9 && > + (inst->opcode == SHADER_OPCODE_TXL || > + inst->opcode == SHADER_OPCODE_TXF) && > + inst->src[TEX_LOGICAL_SRC_LOD].is_zero(); > + > + /* Calculate the total number of argument components that need to be > passed > + * to the sampler unit. > + */ > + const unsigned num_payload_components = > + MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE), > + req_coord_components) + > + inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) + > + (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) + > + inst->components_read(TEX_LOGICAL_SRC_LOD2) + > + inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + > + (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ? > + inst->components_read(TEX_LOGICAL_SRC_OFFSET_VALUE) : 0) + > + inst->components_read(TEX_LOGICAL_SRC_MCS); > + > + /*
Bonus line here ^ can be dropped. > + * SIMD16 messages with more than five arguments exceed the maximum > message > + * size supported by the sampler, regardless of whether a header is > + * provided or not. > + */ > + return MIN2(inst->exec_size, > + num_payload_components > MAX_SAMPLER_MESSAGE_SIZE / 2 ? 8 : > 16); Why not do: const bool simd16_exceeds_max_mlen = 2 * num_payload_components > MAX_SAMPLER_MESSAGE_SIZE; return MIN2(inst->exec_size, simd16_exceeds_max_mlen ? 8 : 16); I find the left-hand multiply a little more obvious than the right-hand divide, because logically each component is occupying two registers in SIMD16 mode, and there's a max message length of 11. > +} > + > +/** > * Get the closest native SIMD width supported by the hardware for > instruction > * \p inst. The instruction will be left untouched by > * fs_visitor::lower_simd_width() if the returned value is equal to the > @@ -4861,31 +4923,25 @@ get_lowered_simd_width(const struct brw_device_info > *devinfo, > case SHADER_OPCODE_LOD_LOGICAL: > case SHADER_OPCODE_TG4_LOGICAL: > case SHADER_OPCODE_SAMPLEINFO_LOGICAL: > - return MIN2(16, inst->exec_size); > + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: > + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: > + return get_sampler_lowered_simd_width(devinfo, inst); > > case SHADER_OPCODE_TXD_LOGICAL: > /* TXD is unsupported in SIMD16 mode. */ > return 8; > > - case SHADER_OPCODE_TG4_OFFSET_LOGICAL: { > - /* gather4_po_c is unsupported in SIMD16 mode. */ > - const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; > - return (shadow_c.file != BAD_FILE ? 8 : MIN2(16, inst->exec_size)); > - } > case SHADER_OPCODE_TXL_LOGICAL: > - case FS_OPCODE_TXB_LOGICAL: { > - /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions, > and > - * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16 > - * mode because the message exceeds the maximum length of 11. > + case FS_OPCODE_TXB_LOGICAL: > + /* Only one execution size is representable pre-ILK depending on > whether > + * the shadow reference argument is present. > */ > - const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; > - if (devinfo->gen == 4 && shadow_c.file == BAD_FILE) > - return 16; > - else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE) > - return 8; > + if (devinfo->gen == 4) > + return (inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE ? > + 16 : 8); If you drop the unnecessary parenthesis, this will fit on one line. Otherwise, our coding style would be to use braces. > else > - return MIN2(16, inst->exec_size); > - } > + return get_sampler_lowered_simd_width(devinfo, inst); > + > case SHADER_OPCODE_TXF_LOGICAL: > case SHADER_OPCODE_TXS_LOGICAL: > /* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD > @@ -4894,23 +4950,7 @@ get_lowered_simd_width(const struct brw_device_info > *devinfo, > if (devinfo->gen == 4) > return 16; > else > - return MIN2(16, inst->exec_size); > - > - case SHADER_OPCODE_TXF_CMS_W_LOGICAL: { > - /* This opcode can take up to 6 arguments which means that in some > - * circumstances it can end up with a message that is too long in > SIMD16 > - * mode. > - */ > - const unsigned coord_components = > - inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; > - /* First three arguments are the sample index and the two arguments for > - * the MCS data. > - */ > - if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE) > - return 8; > - else > - return MIN2(16, inst->exec_size); > - } > + return get_sampler_lowered_simd_width(devinfo, inst); > > case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: > Thanks for taking care of this! Series is: Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev