On Friday, August 12, 2016 10:06:29 PM PDT Francisco Jerez wrote: > The current logic used to determine the execution size of sampler > messages was based on special-casing several argument and opcode > combinations, which unsurprisingly missed the possibility that some > messages could exceed the payload size limit or not depending on the > number of coordinate components present. In particular: > > - The TXL, TXB and TEX messages (the latter on non-FS stages only) > would attempt to use SIMD16 on Gen7+ hardware even if a shadow > reference was present and the texture was a cubemap array, causing > it to overflow the maximum supported sampler payload size and > crash. > > - The TG4_OFFSET message with shadow comparison was falling back to > SIMD8 regardless of the number of coordinate components, which is > unnecessary when two coordinates or less are present. > > Both cases have been handled incorrectly ever since cubemap arrays and > texture gather were respectively enabled (the current logic used by > the SIMD lowering pass is almost unchanged from the previous no16 > fall-back logic used pre-SIMD lowering times). > > Fixes the following GL4.5 conformance test on Gen7-8 (the bug also > affects Gen9+ in principle, but SKL passes the test by luck because it > manages to use the TXL_LZ message instead of TXL): > > GL45-CTS.texture_cube_map_array.sampling > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97267 > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 108 > ++++++++++++++++++++++++----------- > 1 file changed, 74 insertions(+), 34 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 1842d56..b816d24 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -4687,6 +4687,68 @@ get_fpu_lowered_simd_width(const struct > brw_device_info *devinfo, > } > > /** > + * Get the maximum allowed SIMD width for instruction \p inst accounting for > + * various payload size restrictions that apply to sampler message > + * instructions. > + * > + * This is only intended to provide a maximum theoretical bound for the > + * execution size of the message based on the number of argument components > + * alone, which in most cases will determine whether the SIMD8 or SIMD16 > + * variant of the message can be used, though some messages may have > + * additional restrictions not accounted for here (e.g. pre-ILK hardware uses > + * the message length to determine the exact SIMD width and argument count, > + * which makes a number of sampler message combinations impossible to > + * represent). > + */ > +static unsigned > +get_sampler_lowered_simd_width(const struct brw_device_info *devinfo, > + const fs_inst *inst) > +{ > + /* Calculate the number of coordinate components that have to be present > + * assuming that additional arguments follow the texel coordinates in the > + * message payload. On Gen7+ there is no need for padding, on Gen5-6 we > + * need to pad to four or three components depending on the message, on > + * Gen4 we need to pad to at most three components. > + */ > + const unsigned req_coord_components = > + (devinfo->gen >= 7 || > + !inst->components_read(TEX_LOGICAL_SRC_COORDINATE)) ? 0 : > + (devinfo->gen >= 5 && inst->opcode != SHADER_OPCODE_TXF_LOGICAL && > + inst->opcode != SHADER_OPCODE_TXF_CMS_LOGICAL) ? > 4 : > + 3; > + > + /* On Gen9+ the LOD argument is for free if we're able to use the LZ > + * variant of the TXL or TXF message. > + */ > + const bool implicit_lod = devinfo->gen >= 9 && > + (inst->opcode == SHADER_OPCODE_TXL || > + inst->opcode == SHADER_OPCODE_TXF) && > + inst->src[TEX_LOGICAL_SRC_LOD].is_zero(); > + > + /* Calculate the total number of argument components that need to be > passed > + * to the sampler unit. > + */ > + const unsigned num_payload_components = > + MAX2(inst->components_read(TEX_LOGICAL_SRC_COORDINATE), > + req_coord_components) + > + inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) + > + (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) + > + inst->components_read(TEX_LOGICAL_SRC_LOD2) + > + inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + > + (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ? > + inst->components_read(TEX_LOGICAL_SRC_OFFSET_VALUE) : 0) + > + inst->components_read(TEX_LOGICAL_SRC_MCS); > + > + /*
Bonus line here ^ can be dropped.
> + * SIMD16 messages with more than five arguments exceed the maximum
> message
> + * size supported by the sampler, regardless of whether a header is
> + * provided or not.
> + */
> + return MIN2(inst->exec_size,
> + num_payload_components > MAX_SAMPLER_MESSAGE_SIZE / 2 ? 8 :
> 16);
Why not do:
const bool simd16_exceeds_max_mlen =
2 * num_payload_components > MAX_SAMPLER_MESSAGE_SIZE;
return MIN2(inst->exec_size, simd16_exceeds_max_mlen ? 8 : 16);
I find the left-hand multiply a little more obvious than the right-hand
divide, because logically each component is occupying two registers in
SIMD16 mode, and there's a max message length of 11.
> +}
> +
> +/**
> * Get the closest native SIMD width supported by the hardware for
> instruction
> * \p inst. The instruction will be left untouched by
> * fs_visitor::lower_simd_width() if the returned value is equal to the
> @@ -4861,31 +4923,25 @@ get_lowered_simd_width(const struct brw_device_info
> *devinfo,
> case SHADER_OPCODE_LOD_LOGICAL:
> case SHADER_OPCODE_TG4_LOGICAL:
> case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
> - return MIN2(16, inst->exec_size);
> + case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
> + case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
> + return get_sampler_lowered_simd_width(devinfo, inst);
>
> case SHADER_OPCODE_TXD_LOGICAL:
> /* TXD is unsupported in SIMD16 mode. */
> return 8;
>
> - case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
> - /* gather4_po_c is unsupported in SIMD16 mode. */
> - const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
> - return (shadow_c.file != BAD_FILE ? 8 : MIN2(16, inst->exec_size));
> - }
> case SHADER_OPCODE_TXL_LOGICAL:
> - case FS_OPCODE_TXB_LOGICAL: {
> - /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions,
> and
> - * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16
> - * mode because the message exceeds the maximum length of 11.
> + case FS_OPCODE_TXB_LOGICAL:
> + /* Only one execution size is representable pre-ILK depending on
> whether
> + * the shadow reference argument is present.
> */
> - const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
> - if (devinfo->gen == 4 && shadow_c.file == BAD_FILE)
> - return 16;
> - else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
> - return 8;
> + if (devinfo->gen == 4)
> + return (inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE ?
> + 16 : 8);
If you drop the unnecessary parenthesis, this will fit on one line.
Otherwise, our coding style would be to use braces.
> else
> - return MIN2(16, inst->exec_size);
> - }
> + return get_sampler_lowered_simd_width(devinfo, inst);
> +
> case SHADER_OPCODE_TXF_LOGICAL:
> case SHADER_OPCODE_TXS_LOGICAL:
> /* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
> @@ -4894,23 +4950,7 @@ get_lowered_simd_width(const struct brw_device_info
> *devinfo,
> if (devinfo->gen == 4)
> return 16;
> else
> - return MIN2(16, inst->exec_size);
> -
> - case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
> - /* This opcode can take up to 6 arguments which means that in some
> - * circumstances it can end up with a message that is too long in
> SIMD16
> - * mode.
> - */
> - const unsigned coord_components =
> - inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
> - /* First three arguments are the sample index and the two arguments for
> - * the MCS data.
> - */
> - if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
> - return 8;
> - else
> - return MIN2(16, inst->exec_size);
> - }
> + return get_sampler_lowered_simd_width(devinfo, inst);
>
> case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
> case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
>
Thanks for taking care of this!
Series is:
Reviewed-by: Kenneth Graunke <[email protected]>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
