Hi All,
This patch optimizes integer moves for cases where where the move could be done
move efficiently using a smaller mode.
For example:
long long f1(void)
{
return 0xffff6666;
}
long f2(void)
{
return 0x11110000ffff6666;
}
generates:
f1:
mov w0, 4294927974
ret
f2:
mov w0, 4294927974
movk x0, 0x1111, lsl 48
ret
instead of:
f1:
mov x0, 26214
movk x0, 0xffff, lsl 16
ret
f2:
mov x0, 26214
movk x0, 0xffff, lsl 16
movk x0, 0x1111, lsl 48
This works when the low 32 bits are either 0xffffXXXX or 0xXXXXffff (with XXXX
non-zero),
a 32-bit MOVN instruction can be used as if the type was int rather than long
(due to zero-extend to 64 bits).
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-06-07 Tamar Christina <tamar.christ...@arm.com>
* config/aarch64/aarch64.c
(aarch64_internal_mov_immediate): Add new special pattern.
* config/aarch64/aarch64.md (*movdi_aarch64):
Add reg/32bit const mov case.
gcc/testsuite/
2017-06-07 Tamar Christina <tamar.christ...@arm.com>
* gcc.target/aarch64/int_mov_immediate_1.c: New.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a99a13460c2314ca9b40f82a466b6d492c49db97..e91586fa03c64b22c4c8efdf7b98d48c0086126d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1794,6 +1794,27 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
+ val2 = val & 0xffffffff;
+ if (mode == DImode
+ && aarch64_move_imm (val2, SImode)
+ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+ {
+ if (generate)
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+
+ /* Check if we have to emit a second instruction. */
+ if (val == val2)
+ return 1;
+
+ i = (val >> 48) ? 48 : 32;
+
+ if (generate)
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+
+ return 2;
+ }
+
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -1810,7 +1831,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
}
/* Remaining cases are all for DImode. */
-
mask = 0xffff;
zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fdba2d0adde2ef9e8519f6321f7456517c5e916a..5fcf809ae47552395667647e7299dcfe4ebdf7dd 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1060,8 +1060,8 @@
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w, m, m,r,r ,*w, r,*w,w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w, m, m,r,r ,*w, r,*w,w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@@ -1069,6 +1069,7 @@
mov\\t%0, %x1
mov\\t%x0, %1
mov\\t%x0, %1
+ mov\\t%w0, %1
#
ldr\\t%x0, %1
ldr\\t%d0, %1
@@ -1087,10 +1088,10 @@
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
- adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm, mov_imm,mov_imm,load1,\
+ load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c b/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ac9065037f881c96ca81661a7d717133c6cc83d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3" } */
+
+long long f1(void)
+{
+ return 0xffff6666;
+}
+
+int f3(void)
+{
+ return 0xffff6666;
+}
+
+
+long f2(void)
+{
+ return 0x11110000ffff6666;
+}
+
+long f4(void)
+{
+ return 0x11110001ffff6666;
+}
+
+long f5(void)
+{
+ return 0x111100001ff6666;
+}
+
+long f6(void)
+{
+ return 0x00001111ffff6666;
+}
+
+long f7(void)
+{
+ return 0x000011116666ffff;
+}
+
+long f8(void)
+{
+ return 0x0f0011116666ffff;
+}
+
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, -39322" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 4294927974" 3 } } */
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 1718026239" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -2576941057" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -39322" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 26214" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0xf00, lsl 48" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 48" 2 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1000, lsl 32" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 32" 3 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x111, lsl 48" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1ff, lsl 16" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1, lsl 32" 1 } } */
+