Hi, this patch addresses a regression caused by my patch that avoided useless register copies but in a few cases caused us to generate instruction types that did not make the finalizer happy.
Fixed thusly. I am going to commit to the branch now and will queue it for trunk for later. Thanks, Martin 2016-08-12 Martin Jambor <mjam...@suse.cz> * hsa-gen.c (gen_hsa_unary_operation): Make sure the function does not use bittype source type for firstbit and lastbit operations. (gen_hsa_popcount_to_dest): Make sure the function uses a bittype source type. libgomp/ * testsuite/libgomp.hsa.c/bits-insns.c: New test. --- gcc/hsa-gen.c | 13 +++-- libgomp/testsuite/libgomp.hsa.c/bits-insns.c | 73 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 libgomp/testsuite/libgomp.hsa.c/bits-insns.c diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index baa20b9..c946b2f 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -2957,8 +2957,12 @@ gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest, if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type)) insn = new hsa_insn_cvt (dest, op1); else if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) - insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, op1->m_type, NULL, - op1); + { + BrigType16_t srctype = hsa_type_integer_p (op1->m_type) ? op1->m_type + : hsa_unsigned_type_for_type (op1->m_type); + insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, srctype, NULL, + op1); + } else { insn = new hsa_insn_basic (2, opcode, dest->m_type, dest, op1); @@ -4250,12 +4254,13 @@ gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb) if (hsa_type_bit_size (arg->m_type) < 32) arg = arg->get_in_type (BRIG_TYPE_B32, hbb); + BrigType16_t srctype = hsa_bittype_for_type (arg->m_type); if (!hsa_btype_p (arg->m_type)) - arg = arg->get_in_type (hsa_bittype_for_type (arg->m_type), hbb); + arg = arg->get_in_type (srctype, hbb); hsa_insn_srctype *popcount = new hsa_insn_srctype (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32, - arg->m_type, NULL, arg); + srctype, NULL, arg); hbb->append_insn (popcount); popcount->set_output_in_type (dest, 0, hbb); } diff --git a/libgomp/testsuite/libgomp.hsa.c/bits-insns.c b/libgomp/testsuite/libgomp.hsa.c/bits-insns.c new file mode 100644 index 0000000..21cac72 --- /dev/null +++ b/libgomp/testsuite/libgomp.hsa.c/bits-insns.c @@ -0,0 +1,73 @@ +#include <math.h> + +#define N 12 + +int main() +{ + unsigned int arguments[N] = {0u, 1u, 2u, 3u, 111u, 333u, 444u, 0x80000000u, 0x0000ffffu, 0xf0000000u, 0xff000000u, 0xffffffffu}; + int clrsb[N] = {}; + int clz[N] = {}; + int ctz[N] = {}; + int ffs[N] = {}; + int parity[N] = {}; + int popcount[N] = {}; + + int ref_clrsb[N] = {}; + int ref_clz[N] = {}; + int ref_ctz[N] = {}; + int ref_ffs[N] = {}; + int ref_parity[N] = {}; + int ref_popcount[N] = {}; + + for (unsigned i = 0; i < N; i++) + { + ref_clrsb[i] = __builtin_clrsb (arguments[i]); + ref_clz[i] = __builtin_clz (arguments[i]); + ref_ctz[i] = __builtin_ctz (arguments[i]); + ref_ffs[i] = __builtin_ffs (arguments[i]); + ref_parity[i] = __builtin_parity (arguments[i]); + ref_popcount[i] = __builtin_popcount (arguments[i]); + } + + #pragma omp target map(from:clz, ctz, ffs, parity, popcount) + { + for (unsigned i = 0; i < N; i++) + { + clrsb[i] = __builtin_clrsb (arguments[i]); + clz[i] = __builtin_clz (arguments[i]); + ctz[i] = __builtin_ctz (arguments[i]); + ffs[i] = __builtin_ffs (arguments[i]); + parity[i] = __builtin_parity (arguments[i]); + popcount[i] = __builtin_popcount (arguments[i]); + } + } + + for (unsigned i = 0; i < N; i++) + if (ref_clrsb[i] != clrsb[i]) + __builtin_abort (); + + /* CLZ of zero is undefined for zero. */ + for (unsigned i = 1; i < N; i++) + if (ref_clz[i] != clz[i]) + __builtin_abort (); + + /* Likewise for ctz */ + for (unsigned i = 1; i < N; i++) + if (ref_ctz[i] != ctz[i]) + __builtin_abort (); + + for (unsigned i = 0; i < N; i++) + if (ref_ffs[i] != ffs[i]) + __builtin_abort (); + + for (unsigned i = 0; i < N; i++) + if (ref_parity[i] != parity[i]) + __builtin_abort (); + + for (unsigned i = 0; i < N; i++) + if (ref_popcount[i] != popcount[i]) + __builtin_abort (); + + return 0; +} + -- 2.9.2