The hardware doesn't support 16-bit integer types, so we need to implement these using 32-bit integer instructions and then convert the result back to 16-bit. --- src/intel/Makefile.sources | 1 + src/intel/compiler/brw_nir.c | 2 + src/intel/compiler/brw_nir.h | 2 + src/intel/compiler/brw_nir_lower_16bit_int_math.c | 108 ++++++++++++++++++++++ src/intel/compiler/meson.build | 1 + 5 files changed, 114 insertions(+) create mode 100644 src/intel/compiler/brw_nir_lower_16bit_int_math.c
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 91c71a8dfaf..2cd76961ea4 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -79,6 +79,7 @@ COMPILER_FILES = \ compiler/brw_nir_analyze_boolean_resolves.c \ compiler/brw_nir_analyze_ubo_ranges.c \ compiler/brw_nir_attribute_workarounds.c \ + compiler/brw_nir_lower_16bit_int_math.c \ compiler/brw_nir_lower_cs_intrinsics.c \ compiler/brw_nir_opt_peephole_ffma.c \ compiler/brw_nir_tcs_workarounds.c \ diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 69ab162f888..2e5754076ed 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -638,6 +638,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) nir_lower_isign64 | nir_lower_divmod64); + brw_nir_lower_16bit_int_math(nir); + nir = brw_nir_optimize(nir, compiler, is_scalar); if (is_scalar) { diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 03f52da08e5..6ba1a8bc654 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -152,6 +152,8 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler, bool brw_nir_opt_peephole_ffma(nir_shader *shader); +bool brw_nir_lower_16bit_int_math(nir_shader *shader); + nir_shader *brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, bool is_scalar); diff --git a/src/intel/compiler/brw_nir_lower_16bit_int_math.c b/src/intel/compiler/brw_nir_lower_16bit_int_math.c new file mode 100644 index 00000000000..6876309a822 --- /dev/null +++ b/src/intel/compiler/brw_nir_lower_16bit_int_math.c @@ -0,0 +1,108 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "nir_builder.h" + +/** + * Intel hardware doesn't support 16-bit integer Math instructions so this + * pass implements them in 32-bit and then converts the result back to 16-bit. + */ +static void +lower_math_instr(nir_builder *bld, nir_alu_instr *alu, bool is_signed) +{ + const nir_op op = alu->op; + + bld->cursor = nir_before_instr(&alu->instr); + + nir_ssa_def *srcs_32[4] = { NULL, NULL, NULL, NULL }; + const uint32_t num_inputs = nir_op_infos[op].num_inputs; + for (uint32_t i = 0; i < num_inputs; i++) { + nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i); + srcs_32[i] = is_signed ? nir_i2i32(bld, src) : nir_u2u32(bld, src); + } + + nir_ssa_def *dst_32 = + nir_build_alu(bld, op, srcs_32[0], srcs_32[1], srcs_32[2], srcs_32[3]); + + nir_ssa_def *dst_16 = + is_signed ? nir_i2i16(bld, dst_32) : nir_u2u16(bld, dst_32); + + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(dst_16)); +} + +static bool +lower_instr(nir_builder *bld, nir_alu_instr *alu) +{ + assert(alu->dest.dest.is_ssa); + if (alu->dest.dest.ssa.bit_size != 16) + return false; + + bool is_signed = false; + switch (alu->op) { + case nir_op_idiv: + case nir_op_imod: + is_signed = true; + /* Fallthrough */ + case nir_op_udiv: + case nir_op_umod: + case nir_op_irem: + lower_math_instr(bld, alu, is_signed); + return true; + default: + return false; + } +} + +static bool +lower_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_alu) + progress |= lower_instr(&b, nir_instr_as_alu(instr)); + } + } + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; +} + +bool +brw_nir_lower_16bit_int_math(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= lower_impl(function->impl); + } + + return progress; +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 72b7a6796cb..d80fcd6e31b 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -76,6 +76,7 @@ libintel_compiler_files = files( 'brw_nir_analyze_boolean_resolves.c', 'brw_nir_analyze_ubo_ranges.c', 'brw_nir_attribute_workarounds.c', + 'brw_nir_lower_16bit_int_math.c', 'brw_nir_lower_cs_intrinsics.c', 'brw_nir_opt_peephole_ffma.c', 'brw_nir_tcs_workarounds.c', -- 2.14.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev