On Wed, Sep 02, 2015 at 01:35:19PM +0100, Wilco Dijkstra wrote: > aarch64_internal_mov_immediate uses loops iterating over all legal bitmask > immediates to find 2-instruction immediate combinations. One loop is > quadratic and despite being extremely expensive very rarely finds a matching > immediate (43 matches in all of SPEC2006 but none are emitted in final code), > so it can be removed without any effect on code quality. The other loop can > be replaced by a constant-time search: rather than iterating over all legal > bitmask values, reconstruct a potential bitmask and query the fast > aarch64_bitmask_imm. > > No change in generated code, passes GCC regression tests/bootstrap.
Well, presumably those 43 cases in SPEC2006 changed... The code looks OK, and I haven't seen any objections from Marcus or Richard on the overall direction of the patch set. OK for trunk. Thanks, James > > ChangeLog: > 2015-09-02 Wilco Dijkstra <wdijk...@arm.com> > > * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate): > Replace slow immediate matching loops with a faster algorithm. > > --- > gcc/config/aarch64/aarch64.c | 96 > +++++++++++--------------------------------- > 1 file changed, 23 insertions(+), 73 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index c0280e6..d6f7cb0 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool > generate, > unsigned HOST_WIDE_INT mask; > int i; > bool first; > - unsigned HOST_WIDE_INT val; > + unsigned HOST_WIDE_INT val, val2; > bool subtargets; > rtx subtarget; > int one_match, zero_match, first_not_ffff_match; > @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, > bool generate, > } > } > > - /* See if we can do it by arithmetically combining two > - immediates. */ > - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) > + if (zero_match != 2 && one_match != 2) > { > - int j; > - mask = 0xffff; > + /* Try emitting a bitmask immediate with a movk replacing 16 bits. > + For a 64-bit bitmask try whether changing 16 bits to all ones or > + zeroes creates a valid bitmask. To check any repeated bitmask, > + try using 16 bits from the other 32-bit half of val. */ > > - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) > - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) > + for (i = 0; i < 64; i += 16, mask <<= 16) > { > - if (generate) > - { > - subtarget = subtargets ? gen_reg_rtx (DImode) : dest; > - emit_insn (gen_rtx_SET (subtarget, > - GEN_INT (aarch64_bitmasks[i]))); > - emit_insn (gen_adddi3 (dest, subtarget, > - GEN_INT (val - aarch64_bitmasks[i]))); > - } > - num_insns += 2; > - return num_insns; > + val2 = val & ~mask; > + if (val2 != val && aarch64_bitmask_imm (val2, mode)) > + break; > + val2 = val | mask; > + if (val2 != val && aarch64_bitmask_imm (val2, mode)) > + break; > + val2 = val2 & ~mask; > + val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); > + if (val2 != val && aarch64_bitmask_imm (val2, mode)) > + break; > } > - > - for (j = 0; j < 64; j += 16, mask <<= 16) > + if (i != 64) > { > - if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) > + if (generate) > { > - if (generate) > - { > - emit_insn (gen_rtx_SET (dest, > - GEN_INT (aarch64_bitmasks[i]))); > - emit_insn (gen_insv_immdi (dest, GEN_INT (j), > - GEN_INT ((val >> j) & 0xffff))); > - } > - num_insns += 2; > - return num_insns; > + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); > + emit_insn (gen_insv_immdi (dest, GEN_INT (i), > + GEN_INT ((val >> i) & 0xffff))); > } > - } > - } > - > - /* See if we can do it by logically combining two immediates. */ > - for (i = 0; i < AARCH64_NUM_BITMASKS; i++) > - { > - if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) > - { > - int j; > - > - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) > - if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) > - { > - if (generate) > - { > - subtarget = subtargets ? gen_reg_rtx (mode) : dest; > - emit_insn (gen_rtx_SET (subtarget, > - GEN_INT (aarch64_bitmasks[i]))); > - emit_insn (gen_iordi3 (dest, subtarget, > - GEN_INT (aarch64_bitmasks[j]))); > - } > - num_insns += 2; > - return num_insns; > - } > - } > - else if ((val & aarch64_bitmasks[i]) == val) > - { > - int j; > - > - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) > - if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) > - { > - if (generate) > - { > - subtarget = subtargets ? gen_reg_rtx (mode) : dest; > - emit_insn (gen_rtx_SET (subtarget, > - GEN_INT (aarch64_bitmasks[j]))); > - emit_insn (gen_anddi3 (dest, subtarget, > - GEN_INT (aarch64_bitmasks[i]))); > - } > - num_insns += 2; > - return num_insns; > - } > + return 2; > } > } > > -- > 1.8.3 > >