On Wed, 2018-04-25 at 07:05 -0700, Jason Ekstrand wrote: > Some of these comments may be duplicates of ones I made the first > time through. > > On Wed, Apr 11, 2018 at 12:20 AM, Iago Toral Quiroga <itoral@igalia.c > om> wrote: > > The hardware doesn't support 16-bit integer types, so we need to > > implement > > > > these using 32-bit integer instructions and then convert the result > > back > > > > to 16-bit. > > > > --- > > > > src/intel/Makefile.sources | 1 + > > > > src/intel/compiler/brw_nir.c | 2 + > > > > src/intel/compiler/brw_nir.h | 2 + > > > > src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108 > > ++++++++++++++++++++++ > > > > src/intel/compiler/meson.build | 1 + > > > > 5 files changed, 114 insertions(+) > > > > create mode 100644 > > src/intel/compiler/brw_nir_lower_16bit_int_math.c > > > > > > > > diff --git a/src/intel/Makefile.sources > > b/src/intel/Makefile.sources > > > > index 91c71a8dfaf..2cd76961ea4 100644 > > > > --- a/src/intel/Makefile.sources > > > > +++ b/src/intel/Makefile.sources > > > > @@ -79,6 +79,7 @@ COMPILER_FILES = \ > > > > compiler/brw_nir_analyze_boolean_resolves.c \ > > > > compiler/brw_nir_analyze_ubo_ranges.c \ > > > > compiler/brw_nir_attribute_workarounds.c \ > > > > + compiler/brw_nir_lower_16bit_int_math.c \ > > > > compiler/brw_nir_lower_cs_intrinsics.c \ > > > > compiler/brw_nir_opt_peephole_ffma.c \ > > > > compiler/brw_nir_tcs_workarounds.c \ > > > > diff --git a/src/intel/compiler/brw_nir.c > > b/src/intel/compiler/brw_nir.c > > > > index 69ab162f888..2e5754076ed 100644 > > > > --- a/src/intel/compiler/brw_nir.c > > > > +++ b/src/intel/compiler/brw_nir.c > > > > @@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler > > *compiler, nir_shader *nir) > > > > nir_lower_isign64 | > > > > nir_lower_divmod64); > > > > > > > > + brw_nir_lower_16bit_int_math(nir); > > > > + > > > > nir = brw_nir_optimize(nir, compiler, is_scalar); > > > > > > > > if (is_scalar) { > > > > diff --git a/src/intel/compiler/brw_nir.h > > b/src/intel/compiler/brw_nir.h > > > > index 03f52da08e5..6ba1a8bc654 100644 > > > > --- a/src/intel/compiler/brw_nir.h > > > > +++ b/src/intel/compiler/brw_nir.h > > > > @@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct > > brw_compiler *compiler, > > > > > > > > bool brw_nir_opt_peephole_ffma(nir_shader *shader); > > > > > > > > +bool brw_nir_lower_16bit_int_math(nir_shader *shader); > > > > + > > > > nir_shader *brw_nir_optimize(nir_shader *nir, > > > > const struct brw_compiler *compiler, > > > > bool is_scalar); > > > > diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c > > b/src/intel/compiler/brw_nir_lower_16bit_int_math.c > > > > new file mode 100644 > > > > index 00000000000..6876309a822 > > > > --- /dev/null > > > > +++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c > > > > @@ -0,0 +1,108 @@ > > > > +/* > > > > + * Copyright © 2018 Intel Corporation > > > > + * > > > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > > > + * copy of this software and associated documentation files (the > > "Software"), > > > > + * to deal in the Software without restriction, including without > > limitation > > > > + * the rights to use, copy, modify, merge, publish, distribute, > > sublicense, > > > > + * and/or sell copies of the Software, and to permit persons to > > whom the > > > > + * Software is furnished to do so, subject to the following > > conditions: > > > > + * > > > > + * The above copyright notice and this permission notice > > (including the next > > > > + * paragraph) shall be included in all copies or substantial > > portions of the > > > > + * Software. > > > > + * > > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > EXPRESS OR > > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL > > > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > > OTHER DEALINGS > > > > + * IN THE SOFTWARE. > > > > + */ > > > > + > > > > +#include "brw_nir.h" > > > > +#include "nir_builder.h" > > > > + > > > > +/** > > > > + * Intel hardware doesn't support 16-bit integer Math instructions > > so this > > > > + * pass implements them in 32-bit and then converts the result > > back to 16-bit. > > > > + */ > > > > +static void > > > > +lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool > > is_signed) > > > > +{ > > > > + const nir_op op = alu->op; > > > > + > > > > + bld->cursor = nir_before_instr(&alu->instr); > > > > + > > > > + nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL }; > > > > + const uint32_t num_inputs = nir_op_infos[op].num_inputs; > > > > + for (uint32_t i = 0; i < num_inputs; i++) { > > > > + nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i); > > > > + srcs_32[i] = is_signed ? nir_i2i32(bld, src) : > > nir_u2u32(bld, src); > > For float16, we'll need f2f32.
Yes, I have that (in a separate pass for float16), I suppose merging both makes more sense that having them be separate. > Also, is_signed can be derived from nir_op_infos[op].input_types so > it doesn't need to be passed in. If we want to make it fully > general, we probably also want to only do the conversion if the > source type is unsized. Good point. > > + } > > > > + > > > > + nir_ssa_def *dst_32 = > > > > + nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], > > srcs_32[3]); > > > > + > > > > + nir_ssa_def *dst_16 = > > > > + is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32); > > Again, we can pull this from the destination type. > > > + > > > > + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, > > nir_src_for_ssa(dst_16)); > > > > +} > > > > + > > > > +static bool > > > > +lower_instr(nir_builder *bld, nir_alu_instr *alu) > > > > +{ > > As mentioned in previous discussions, we may want to have a control > function such as > > unsigned (*lower_bit_size)(const nir_alu_instr *, void *) > > where the void * is something you pass in when you call the > optimization pass. I'm usually not a huge fan of making a super- > general thing before we need it. However, we already have two > different drivers that need it for different things so let's just do > it, make sure it works for all the use-cases, and get it right the > first time. Yes, I agree. > > + assert(alu->dest.dest.is_ssa); > > > > + if (alu->dest.dest.ssa.bit_size != 16) > > > > + return false; > > > > + > > > > + bool is_signed = false; > > > > + switch (alu->op) { > > > > + case nir_op_idiv: > > > > + case nir_op_imod: > > > > + is_signed = true; > > > > + /* Fallthrough */ > > > > + case nir_op_udiv: > > > > + case nir_op_umod: > > > > + case nir_op_irem: > > irem is sgned. Oops, right. > > + lower_math_instr(bld, alu, is_signed); > > > > + return true; > > > > + default: > > > > + return false; > > > > + } > > > > +} > > > > + > > > > +static bool > > > > +lower_impl(nir_function_impl *impl) > > > > +{ > > > > + nir_builder b; > > > > + nir_builder_init(&b, impl); > > > > + bool progress = false; > > > > + > > > > + nir_foreach_block(block, impl) { > > > > + nir_foreach_instr_safe(instr, block) { > > > > + if (instr->type == nir_instr_type_alu) > > > > + progress |= lower_instr(&b, nir_instr_as_alu(instr)); > > > > + } > > > > + } > > > > + > > > > + nir_metadata_preserve(impl, nir_metadata_block_index | > > > > + nir_metadata_dominance); > > Probably only want to call this if (progress) Ok. > > + > > > > + return progress; > > > > +} > > > > + > > > > +bool > > > > +brw_nir_lower_16bit_int_math(nir_shader *shader) > > If we want this to handle 8-bit things, maybe it needs a different > name. :-) Yes, and also if we want this to handle floats. Iago > > +{ > > > > + bool progress = false; > > > > + > > > > + nir_foreach_function(function, shader) { > > > > + if (function->impl) > > > > + progress |= lower_impl(function->impl); > > > > + } > > > > + > > > > + return progress; > > > > +} > > > > diff --git a/src/intel/compiler/meson.build > > b/src/intel/compiler/meson.build > > > > index 72b7a6796cb..d80fcd6e31b 100644 > > > > --- a/src/intel/compiler/meson.build > > > > +++ b/src/intel/compiler/meson.build > > > > @@ -76,6 +76,7 @@ libintel_compiler_files = files( > > > > 'brw_nir_analyze_boolean_resolves.c', > > > > 'brw_nir_analyze_ubo_ranges.c', > > > > 'brw_nir_attribute_workarounds.c', > > > > + 'brw_nir_lower_16bit_int_math.c', > > > > 'brw_nir_lower_cs_intrinsics.c', > > > > 'brw_nir_opt_peephole_ffma.c', > > > > 'brw_nir_tcs_workarounds.c', > > > > -- > > > > 2.14.1 > > > > > > > > _______________________________________________ > > > > mesa-dev mailing list > > > > mesa-dev@lists.freedesktop.org > > > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev