aarch64_internal_mov_immediate uses loops iterating over all legal bitmask immediates to find 2-instruction immediate combinations. One loop is quadratic and despite being extremely expensive very rarely finds a matching immediate (43 matches in all of SPEC2006 but none are emitted in final code), so it can be removed without any effect on code quality. The other loop can be replaced by a constant-time search: rather than iterating over all legal bitmask values, reconstruct a potential bitmask and query the fast aarch64_bitmask_imm.
No change in generated code, passes GCC regression tests/bootstrap. ChangeLog: 2015-09-02 Wilco Dijkstra <wdijk...@arm.com> * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate): Replace slow immediate matching loops with a faster algorithm. --- gcc/config/aarch64/aarch64.c | 96 +++++++++++--------------------------------- 1 file changed, 23 insertions(+), 73 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index c0280e6..d6f7cb0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, unsigned HOST_WIDE_INT mask; int i; bool first; - unsigned HOST_WIDE_INT val; + unsigned HOST_WIDE_INT val, val2; bool subtargets; rtx subtarget; int one_match, zero_match, first_not_ffff_match; @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, } } - /* See if we can do it by arithmetically combining two - immediates. */ - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) + if (zero_match != 2 && one_match != 2) { - int j; - mask = 0xffff; + /* Try emitting a bitmask immediate with a movk replacing 16 bits. + For a 64-bit bitmask try whether changing 16 bits to all ones or + zeroes creates a valid bitmask. To check any repeated bitmask, + try using 16 bits from the other 32-bit half of val. */ - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) + for (i = 0; i < 64; i += 16, mask <<= 16) { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - } - num_insns += 2; - return num_insns; + val2 = val & ~mask; + if (val2 != val && aarch64_bitmask_imm (val2, mode)) + break; + val2 = val | mask; + if (val2 != val && aarch64_bitmask_imm (val2, mode)) + break; + val2 = val2 & ~mask; + val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); + if (val2 != val && aarch64_bitmask_imm (val2, mode)) + break; } - - for (j = 0; j < 64; j += 16, mask <<= 16) + if (i != 64) { - if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) + if (generate) { - if (generate) - { - emit_insn (gen_rtx_SET (dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - } - num_insns += 2; - return num_insns; + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); } - } - } - - /* See if we can do it by logically combining two immediates. */ - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) - { - if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) - { - int j; - - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - } - num_insns += 2; - return num_insns; - } - } - else if ((val & aarch64_bitmasks[i]) == val) - { - int j; - - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) - { - if (generate) - { - subtarget = subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - } - num_insns += 2; - return num_insns; - } + return 2; } } -- 1.8.3