On 16/05/14 13:35, Richard Earnshaw wrote:
On 08/05/14 18:36, Ian Bolton wrote:
Hi,

It currently takes 4 instructions to generate certain immediates on
AArch64 (unless we put them in the constant pool).

For example ...

   long long
   ffffbeefcafebabe ()
   {
     return 0xFFFFBEEFCAFEBABEll;
   }

leads to ...

   mov x0, 0x47806
   mov x0, 0xcafe, lsl 16
   mov x0, 0xbeef, lsl 32
   orr x0, x0, -281474976710656

The above case is tackled in this patch by employing MOVN
to generate the top 32-bits in a single instruction ...

   mov x0, -71536975282177
   movk x0, 0xcafe, lsl 16
   movk x0, 0xbabe, lsl 0

Note that where at least two half-words are 0xffff, existing
code that does the immediate in two instructions is still used.)

Tested on standard gcc regressions and the attached test case.

OK for commit?
What about:

long long a()
{
   return 0x1234ffff56789abcll;
}

long long b()
{
   return 0x12345678ffff9abcll;
}

long long c()
{
   return 0x123456789abcffffll;
}

?

Surely these can also benefit from this sort of optimization, but it
looks as though you only handle the top 16 bits being set.

Hi Richard,

How about this rework of the patch?

For code:

long long foo ()
{
  return 0xFFFFBEEFCAFEBABEll;
}

long long a()
{
  return 0x1234ffff56789abcll;
}

long long b()
{
  return 0x12345678ffff9abcll;
}

long long c()
{
  return 0x123456789abcffffll;
}

we now generate:
foo:
        mov     x0, -17730
        movk    x0, 0xcafe, lsl 16
        movk    x0, 0xbeef, lsl 32
        ret
        .size   foo, .-foo
        .align  2
        .global a
        .type   a, %function
a:
        mov     x0, -25924
        movk    x0, 0x5678, lsl 16
        movk    x0, 0x1234, lsl 48
        ret
        .size   a, .-a
        .align  2
        .global b
        .type   b, %function
b:
        mov     x0, -25924
        movk    x0, 0x5678, lsl 32
        movk    x0, 0x1234, lsl 48
        ret
        .size   b, .-b
        .align  2
        .global c
        .type   c, %function
c:
        mov     x0, -1698889729
        movk    x0, 0x5678, lsl 32
        movk    x0, 0x1234, lsl 48
        ret


3 instructions are used in each case.

Thanks,
Kyrill

2014-08-07  Ian Bolton  <ian.bol...@arm.com>
                    Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

        * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
        Use MOVN when one of the half-words is 0xffff.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0a7f441..2db91c7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
   unsigned HOST_WIDE_INT val;
   bool subtargets;
   rtx subtarget;
-  int one_match, zero_match;
+  int one_match, zero_match, first_not_ffff_match;
 
   gcc_assert (mode == SImode || mode == DImode);
 
@@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
   one_match = 0;
   zero_match = 0;
   mask = 0xffff;
+  first_not_ffff_match = -1;
 
   for (i = 0; i < 64; i += 16, mask <<= 16)
     {
-      if ((val & mask) == 0)
-	zero_match++;
-      else if ((val & mask) == mask)
+      if ((val & mask) == mask)
 	one_match++;
+      else
+	{
+	  if (first_not_ffff_match < 0)
+	    first_not_ffff_match = i;
+	  if ((val & mask) == 0)
+	    zero_match++;
+	}
     }
 
   if (one_match == 2)
     {
-      mask = 0xffff;
-      for (i = 0; i < 64; i += 16, mask <<= 16)
+      /* Set one of the quarters and then insert back into result.  */
+      mask = 0xffffll << first_not_ffff_match;
+      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+      emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+				 GEN_INT ((val >> first_not_ffff_match)
+					  & 0xffff)));
+      return;
+    }
+
+  if (one_match == 1)
+    {
+      /* Set either first three quarters or all but the third.	 */
+      mask = 0xffffll << (16 - first_not_ffff_match);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      GEN_INT (val | mask | 0xffffffff00000000ull)));
+
+      /* Now insert other two quarters.	 */
+      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
+	   i < 64; i += 16, mask <<= 16)
 	{
 	  if ((val & mask) != mask)
-	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					 GEN_INT ((val >> i) & 0xffff)));
-	      return;
-	    }
+	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				       GEN_INT ((val >> i) & 0xffff)));
 	}
-      gcc_unreachable ();
+      return;
     }
 
   if (zero_match == 2)

Reply via email to