compiler: lower 16-bit integer extended math instructions

Iago Toral Wed, 25 Apr 2018 23:02:18 -0700

On Wed, 2018-04-25 at 07:05 -0700, Jason Ekstrand wrote:
> Some of these comments may be duplicates of ones I made the first
> time through.
> 
> On Wed, Apr 11, 2018 at 12:20 AM, Iago Toral Quiroga <itoral@igalia.c
> om> wrote:
> > The hardware doesn't support 16-bit integer types, so we need to
> > implement
> > 
> > these using 32-bit integer instructions and then convert the result
> > back
> > 
> > to 16-bit.
> > 
> > ---
> > 
> >  src/intel/Makefile.sources                        |   1 +
> > 
> >  src/intel/compiler/brw_nir.c                      |   2 +
> > 
> >  src/intel/compiler/brw_nir.h                      |   2 +
> > 
> >  src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108
> > ++++++++++++++++++++++
> > 
> >  src/intel/compiler/meson.build                    |   1 +
> > 
> >  5 files changed, 114 insertions(+)
> > 
> >  create mode 100644
> > src/intel/compiler/brw_nir_lower_16bit_int_math.c
> > 
> > 
> > 
> > diff --git a/src/intel/Makefile.sources
> > b/src/intel/Makefile.sources
> > 
> > index 91c71a8dfaf..2cd76961ea4 100644
> > 
> > --- a/src/intel/Makefile.sources
> > 
> > +++ b/src/intel/Makefile.sources
> > 
> > @@ -79,6 +79,7 @@ COMPILER_FILES = \
> > 
> >         compiler/brw_nir_analyze_boolean_resolves.c \
> > 
> >         compiler/brw_nir_analyze_ubo_ranges.c \
> > 
> >         compiler/brw_nir_attribute_workarounds.c \
> > 
> > +       compiler/brw_nir_lower_16bit_int_math.c \
> > 
> >         compiler/brw_nir_lower_cs_intrinsics.c \
> > 
> >         compiler/brw_nir_opt_peephole_ffma.c \
> > 
> >         compiler/brw_nir_tcs_workarounds.c \
> > 
> > diff --git a/src/intel/compiler/brw_nir.c
> > b/src/intel/compiler/brw_nir.c
> > 
> > index 69ab162f888..2e5754076ed 100644
> > 
> > --- a/src/intel/compiler/brw_nir.c
> > 
> > +++ b/src/intel/compiler/brw_nir.c
> > 
> > @@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler
> > *compiler, nir_shader *nir)
> > 
> >                          nir_lower_isign64 |
> > 
> >                          nir_lower_divmod64);
> > 
> > 
> > 
> > +   brw_nir_lower_16bit_int_math(nir);
> > 
> > +
> > 
> >     nir = brw_nir_optimize(nir, compiler, is_scalar);
> > 
> > 
> > 
> >     if (is_scalar) {
> > 
> > diff --git a/src/intel/compiler/brw_nir.h
> > b/src/intel/compiler/brw_nir.h
> > 
> > index 03f52da08e5..6ba1a8bc654 100644
> > 
> > --- a/src/intel/compiler/brw_nir.h
> > 
> > +++ b/src/intel/compiler/brw_nir.h
> > 
> > @@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct
> > brw_compiler *compiler,
> > 
> > 
> > 
> >  bool brw_nir_opt_peephole_ffma(nir_shader *shader);
> > 
> > 
> > 
> > +bool brw_nir_lower_16bit_int_math(nir_shader *shader);
> > 
> > +
> > 
> >  nir_shader *brw_nir_optimize(nir_shader *nir,
> > 
> >                               const struct brw_compiler *compiler,
> > 
> >                               bool is_scalar);
> > 
> > diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c
> > b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
> > 
> > new file mode 100644
> > 
> > index 00000000000..6876309a822
> > 
> > --- /dev/null
> > 
> > +++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c
> > 
> > @@ -0,0 +1,108 @@
> > 
> > +/*
> > 
> > + * Copyright © 2018 Intel Corporation
> > 
> > + *
> > 
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > 
> > + * copy of this software and associated documentation files (the
> > "Software"),
> > 
> > + * to deal in the Software without restriction, including without
> > limitation
> > 
> > + * the rights to use, copy, modify, merge, publish, distribute,
> > sublicense,
> > 
> > + * and/or sell copies of the Software, and to permit persons to
> > whom the
> > 
> > + * Software is furnished to do so, subject to the following
> > conditions:
> > 
> > + *
> > 
> > + * The above copyright notice and this permission notice
> > (including the next
> > 
> > + * paragraph) shall be included in all copies or substantial
> > portions of the
> > 
> > + * Software.
> > 
> > + *
> > 
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > EXPRESS OR
> > 
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > 
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > EVENT SHALL
> > 
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > 
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > 
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > OTHER DEALINGS
> > 
> > + * IN THE SOFTWARE.
> > 
> > + */
> > 
> > +
> > 
> > +#include "brw_nir.h"
> > 
> > +#include "nir_builder.h"
> > 
> > +
> > 
> > +/**
> > 
> > + * Intel hardware doesn't support 16-bit integer Math instructions
> > so this
> > 
> > + * pass implements them in 32-bit and then converts the result
> > back to 16-bit.
> > 
> > + */
> > 
> > +static void
> > 
> > +lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool
> > is_signed)
> > 
> > +{
> > 
> > +   const nir_op op = alu->op;
> > 
> > +
> > 
> > +   bld->cursor = nir_before_instr(&alu->instr);
> > 
> > +
> > 
> > +   nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL };
> > 
> > +   const uint32_t num_inputs = nir_op_infos[op].num_inputs;
> > 
> > +   for (uint32_t i = 0; i < num_inputs; i++) {
> > 
> > +      nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
> > 
> > +      srcs_32[i] = is_signed ? nir_i2i32(bld, src) :
> > nir_u2u32(bld, src);
> 
> For float16, we'll need f2f32.


Yes, I have that (in a separate pass for float16), I suppose merging
both makes more sense that having them be separate.
>  Also, is_signed can be derived from nir_op_infos[op].input_types so
> it doesn't need to be passed in.  If we want to make it fully
> general, we probably also want to only do the conversion if the
> source type is unsized.

Good point.
> > +   }
> > 
> > +
> > 
> > +   nir_ssa_def *dst_32 =
> > 
> > +      nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2],
> > srcs_32[3]);
> > 
> > +
> > 
> > +   nir_ssa_def *dst_16 =
> > 
> > +      is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32);
> 
> Again, we can pull this from the destination type.
>  
> > +
> > 
> > +   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
> > nir_src_for_ssa(dst_16));
> > 
> > +}
> > 
> > +
> > 
> > +static bool
> > 
> > +lower_instr(nir_builder *bld, nir_alu_instr *alu)
> > 
> > +{
> 
> As mentioned in previous discussions, we may want to have a control
> function such as
> 
> unsigned (*lower_bit_size)(const nir_alu_instr *, void *)
> 
> where the void * is something you pass in when you call the
> optimization pass.  I'm usually not a huge fan of making a super-
> general thing before we need it.  However, we already have two
> different drivers that need it for different things so let's just do
> it, make sure it works for all the use-cases, and get it right the
> first time.

Yes, I agree.
> > +   assert(alu->dest.dest.is_ssa);
> > 
> > +   if (alu->dest.dest.ssa.bit_size != 16)
> > 
> > +      return false;
> > 
> > +
> > 
> > +   bool is_signed = false;
> > 
> > +   switch (alu->op) {
> > 
> > +   case nir_op_idiv:
> > 
> > +   case nir_op_imod:
> > 
> > +      is_signed = true;
> > 
> > +      /* Fallthrough */
> > 
> > +   case nir_op_udiv:
> > 
> > +   case nir_op_umod:
> > 
> > +   case nir_op_irem:
> 
> irem is sgned.

Oops, right.
> > +      lower_math_instr(bld, alu, is_signed);
> > 
> > +      return true;
> > 
> > +   default:
> > 
> > +      return false;
> > 
> > +   }
> > 
> > +}
> > 
> > +
> > 
> > +static bool
> > 
> > +lower_impl(nir_function_impl *impl)
> > 
> > +{
> > 
> > +   nir_builder b;
> > 
> > +   nir_builder_init(&b, impl);
> > 
> > +   bool progress = false;
> > 
> > +
> > 
> > +   nir_foreach_block(block, impl) {
> > 
> > +      nir_foreach_instr_safe(instr, block) {
> > 
> > +         if (instr->type == nir_instr_type_alu)
> > 
> > +            progress |= lower_instr(&b, nir_instr_as_alu(instr));
> > 
> > +      }
> > 
> > +   }
> > 
> > +
> > 
> > +   nir_metadata_preserve(impl, nir_metadata_block_index |
> > 
> > +                               nir_metadata_dominance);
> 
> Probably only want to call this if (progress)

Ok.
> > +
> > 
> > +   return progress;
> > 
> > +}
> > 
> > +
> > 
> > +bool
> > 
> > +brw_nir_lower_16bit_int_math(nir_shader *shader)
> 
> If we want this to handle 8-bit things, maybe it needs a different
> name. :-)

Yes, and also if we want this to handle floats.
Iago
> > +{
> > 
> > +   bool progress = false;
> > 
> > +
> > 
> > +   nir_foreach_function(function, shader) {
> > 
> > +      if (function->impl)
> > 
> > +         progress |= lower_impl(function->impl);
> > 
> > +   }
> > 
> > +
> > 
> > +   return progress;
> > 
> > +}
> > 
> > diff --git a/src/intel/compiler/meson.build
> > b/src/intel/compiler/meson.build
> > 
> > index 72b7a6796cb..d80fcd6e31b 100644
> > 
> > --- a/src/intel/compiler/meson.build
> > 
> > +++ b/src/intel/compiler/meson.build
> > 
> > @@ -76,6 +76,7 @@ libintel_compiler_files = files(
> > 
> >    'brw_nir_analyze_boolean_resolves.c',
> > 
> >    'brw_nir_analyze_ubo_ranges.c',
> > 
> >    'brw_nir_attribute_workarounds.c',
> > 
> > +  'brw_nir_lower_16bit_int_math.c',
> > 
> >    'brw_nir_lower_cs_intrinsics.c',
> > 
> >    'brw_nir_opt_peephole_ffma.c',
> > 
> >    'brw_nir_tcs_workarounds.c',
> > 
> > -- 
> > 
> > 2.14.1
> > 
> > 
> > 
> > _______________________________________________
> > 
> > mesa-dev mailing list
> > 
> > mesa-dev@lists.freedesktop.org
> > 
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/11] intel/compiler: lower 16-bit integer extended math instructions

Reply via email to