On Thu, May 31, 2018 at 11:13 AM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 05/31/2018 01:55 AM, Timothy Arceri wrote: >> >> On 31/05/18 03:06, Samuel Pitoiset wrote: >>> >>> Similar for max(). >>> >>> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> >>> --- >>> src/compiler/nir/nir.h | 3 +++ >>> src/compiler/nir/nir_opt_algebraic.py | 8 ++++++++ >>> 2 files changed, 11 insertions(+) >>> >>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h >>> index f6086bd6c0..04991b7d04 100644 >>> --- a/src/compiler/nir/nir.h >>> +++ b/src/compiler/nir/nir.h >>> @@ -1897,6 +1897,9 @@ typedef struct nir_shader_compiler_options { >>> /* lower b2f to iand */ >>> bool lower_b2f; >>> + /* lower min(min(a, b), c) to min3(a, b, c) (same for max()). */ >>> + bool lower_minmax3; >>> + >>> /* Does the native fdot instruction replicate its result for four >>> * components? If so, then opt_algebraic_late will turn all fdotN >>> * instructions into fdot_replicatedN instructions. >>> diff --git a/src/compiler/nir/nir_opt_algebraic.py >>> b/src/compiler/nir/nir_opt_algebraic.py >>> index 909ea3daf4..1d67e2d88c 100644 >>> --- a/src/compiler/nir/nir_opt_algebraic.py >>> +++ b/src/compiler/nir/nir_opt_algebraic.py >>> @@ -224,6 +224,14 @@ optimizations = [ >>> (('imax', a, a), a), >>> (('umin', a, a), a), >>> (('umax', a, a), a), >>> + >>> + (('fmin', ('fmin', a, b), c), ('fmin3', a, b, c), >>> 'options->lower_minmax3'), >>> + (('imin', ('imin', a, b), c), ('imin3', a, b, c), >>> 'options->lower_minmax3'), >>> + (('umin', ('umin', a, b), c), ('umin3', a, b, c), >>> 'options->lower_minmax3'), >>> + (('fmax', ('fmax', a, b), c), ('fmax3', a, b, c), >>> 'options->lower_minmax3'), >>> + (('imax', ('imax', a, b), c), ('imax3', a, b, c), >>> 'options->lower_minmax3'), >>> + (('umax', ('umax', a, b), c), ('umax3', a, b, c), >>> 'options->lower_minmax3'), >> >> >> These look like they would be better suited in late_optimizations rather >> than optimizations? > > > Totals from affected shaders: > SGPRS: 104 -> 104 (0.00 %) > VGPRS: 64 -> 64 (0.00 %) > Spilled SGPRs: 0 -> 0 (0.00 %) > Spilled VGPRs: 0 -> 0 (0.00 %) > Private memory VGPRs: 0 -> 0 (0.00 %) > Scratch size: 0 -> 0 (0.00 %) dwords per thread > Code Size: 4652 -> 4644 (-0.17 %) bytes > LDS: 4 -> 4 (0.00 %) blocks > Max Waves: 24 -> 24 (0.00 %) > Wait states: 0 -> 0 (0.00 %) > > When moved to late_optimizations.
Is that compared to no min3/max3 lowering at all or compared to doing those non-late? > > >> >> >>> + >>> (('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))), >>> (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))), >>> (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))), >>> > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev