On Thu, Jan 07, 2010 at 03:53:59AM -0500, Jakub Jelinek wrote: > On Thu, Jan 07, 2010 at 09:48:53AM +0100, Gabriel Paubert wrote: > > > apparently rs6000_emit_set_long_const needs work. > > > lis 3,0x8034 > > > extsw 3,3 > > > or > > > li 3,0x401a > > > sldi 3,3,17 > > > etc. do IMHO the same. > > > > Huh? I don't think so: > > > > - first one loads 0xffff_ffff_8034_0000 in r3, and the extsw looks redundant > > I meant lis 3,0x8034; rldicl 3,3,0,32 for the first case, sorry for mixing > sign extension with zero extension.
Attached is a quick version of an optimization. On: long f0 (long x) { return 0x80340001UL; } long f1 (long x) { return 0x80340000UL; } long f2 (long x) { return 0xfffffff000000000UL; } long f3 (long x) { return 0x1ffffffffUL; } long f4 (long x) { return 0xffffff1230000000UL; } long f5 (long x) { return 0x180340000UL; } long f6 (long x) { return 0xfffffff180300000UL; } the change is: .f0: - li 3,0 - ori 3,3,32820 - sldi 3,3,16 + lis 3,0x8034 ori 3,3,1 + rldicl 3,3,0,32 ... .f1: - li 3,0 - ori 3,3,32820 - sldi 3,3,16 + lis 3,0x8034 + rldicl 3,3,0,32 Unfortunately the current constraints prohibit the other easy constants (e.g. the 2 insns ones) from being expanded inline, and the cases with just one 0->1 resp. 1->0 bit transition were already handled before. So, either we'd need to add a new constraint (for which cases, just all 2 insn ones?), or it would be sufficient to kill analyze_64bit_constant and just handle the ud3 == 0 && ud4 == 0 case. Jakub
2010-01-07 Jakub Jelinek <ja...@redhat.com> * config/rs6000/rs6000.c (analyze_64bit_constant): New function. (rs6000_emit_set_long_const): Optimize. --- gcc/config/rs6000/rs6000.c.jj 2009-12-10 19:19:08.000000000 +0100 +++ gcc/config/rs6000/rs6000.c 2010-01-07 09:59:44.000000000 +0100 @@ -6091,6 +6091,45 @@ rs6000_emit_set_const (rtx dest, enum ma return result; } +/* Analyze a 64-bit constant for certain properties. */ +static void analyze_64bit_constant (unsigned HOST_WIDE_INT, + int *, int *, int *); + +static void +analyze_64bit_constant (unsigned HOST_WIDE_INT bits, + int *hbsp, int *lbsp, int *topp) +{ + int lowest_bit_set, highest_bit_set, top_bits_set; + int i; + + lowest_bit_set = highest_bit_set = -1; + i = 0; + do + { + if ((lowest_bit_set == -1) && ((bits >> i) & 1)) + lowest_bit_set = i; + if ((highest_bit_set == -1) && ((bits >> (64 - i - 1)) & 1)) + highest_bit_set = 64 - i - 1; + } + while (++i < 64 + && ((highest_bit_set == -1) + || (lowest_bit_set == -1))); + /* If there are no bits set this should have gone out + as one instruction! */ + gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); + top_bits_set = highest_bit_set - lowest_bit_set + 1; + for (i = highest_bit_set; i >= lowest_bit_set; i--) + { + if ((bits & ((HOST_WIDE_INT) 1 << i)) != 0) + continue; + top_bits_set = highest_bit_set - i; + break; + } + *hbsp = highest_bit_set; + *lbsp = lowest_bit_set; + *topp = top_bits_set; +} + /* Having failed to find a 3 insn sequence in rs6000_emit_set_const, fall back to a straight forward decomposition. We do this to avoid exponential run times encountered when looking for longer sequences @@ -6112,12 +6151,11 @@ rs6000_emit_set_long_const (rtx dest, HO else { HOST_WIDE_INT ud1, ud2, ud3, ud4; + int lowest_bit_set, highest_bit_set, top_bits_set; ud1 = c1 & 0xffff; ud2 = (c1 & 0xffff0000) >> 16; -#if HOST_BITS_PER_WIDE_INT >= 64 c2 = c1 >> 32; -#endif ud3 = c2 & 0xffff; ud4 = (c2 & 0xffff0000) >> 16; @@ -6128,6 +6166,7 @@ rs6000_emit_set_long_const (rtx dest, HO emit_move_insn (dest, GEN_INT (((ud1 ^ 0x8000) - 0x8000))); else emit_move_insn (dest, GEN_INT (ud1)); + return dest; } else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) @@ -6142,6 +6181,52 @@ rs6000_emit_set_long_const (rtx dest, HO emit_move_insn (copy_rtx (dest), gen_rtx_IOR (DImode, copy_rtx (dest), GEN_INT (ud1))); + return dest; + } + analyze_64bit_constant (c1, &highest_bit_set, &lowest_bit_set, + &top_bits_set); + /* See if a 2 insn sequence isn't possible. + li reg,cst; sldi reg,reg,shift. */ + if (((highest_bit_set - lowest_bit_set) < 15 + || (highest_bit_set == 63 + && (highest_bit_set - lowest_bit_set) < 15 + top_bits_set)) + /* For 0x00000000XXXX0000 prefer the next 2 insn sequence. */ + && (ud1 | ud3 | ud4) != 0) + { + HOST_WIDE_INT the_const = c1 >> lowest_bit_set; + emit_move_insn (dest, GEN_INT (the_const)); + emit_move_insn (copy_rtx (dest), + gen_rtx_ASHIFT (DImode, copy_rtx (dest), + GEN_INT (lowest_bit_set))); + } + else if (ud3 == 0 && ud4 == 0) + { + gcc_assert (ud2 & 0x8000); + emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000) + - 0x80000000)); + if (ud1 != 0) + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud1))); + emit_move_insn (copy_rtx (dest), + gen_rtx_ZERO_EXTEND (DImode, + gen_lowpart (SImode, + copy_rtx (dest)))); + } + /* 3 insn sequence. + lis reg,csth; ori reg,reg,cstl; sldi reg,reg,shift. */ + else if ((highest_bit_set - lowest_bit_set) < 31 + || (highest_bit_set == 63 + && (highest_bit_set - lowest_bit_set) < 31 + top_bits_set)) + { + HOST_WIDE_INT the_const = c1 >> lowest_bit_set; + emit_move_insn (dest, GEN_INT (the_const >> 16)); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (the_const & 0xffff))); + emit_move_insn (copy_rtx (dest), + gen_rtx_ASHIFT (DImode, copy_rtx (dest), + GEN_INT (lowest_bit_set))); } else if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && ! (ud3 & 0x8000)))