Hi All,
This patch converts SImode to DImode extends that also move from core
registers to VFP/NEON registers.
Currently, the compiler does extends in core registers first, and then
does the move. This adds to register pressure, which I would imagine to
be a bad thing. If the value is not in a properly aligned register (the
first parameter to a register never is) then it also has to move that
around also.
With my patch, it first moves the SImode value into the NEON register,
and then extends it, which uses no extra registers.
Zero extend, before and after (assuming the value is passed in r0):
mov r2, r0 | vdup.32 d16, r0
movs r3, #0 | vshr.u64 d16, d16, #32
fmdrr d16, r2, r3 |
Sign extend:
mov r2, r0 | vdup.32 d16, r0
asrs r3, r0, #31 | vshr.s64 d16, d16, #32
fmdrr d16, r2, r3 |
OK for 4.8?
Andrew
P.S.
I have experimented with doing zero-extends something like
vmov.i64 d7, #0
fmsr s14, r0
But, somehow the immediate load doesn't seem to work, and it limits the
target register to VFP_LO_REGS. It's also not possible to load into only
s15, so I'm not sure there's any advantage.
2012-02-23 Andrew Stubbs <a...@codesourcery.com>
gcc/
* config/arm/arm.md (zero_extend<mode>di2): Add extra alternatives
for NEON registers.
(extend<mode>di2): Likewise.
Prevent extend splitters doing NEON alternatives.
* config/arm/iterators.md (qhs_extenddi_cstr, qhs_zextenddi_cstr):
Adjust constraints to add new alternatives.
* config/arm/neon.md: Add splitters for zero- and sign-extend.
gcc/testsuite/
* gcc.target/arm/neon-extend-1.c: New file.
* gcc.target/arm/neon-extend-2.c: New file.
---
gcc/config/arm/arm.md | 26 +++++++++++++++-----------
gcc/config/arm/iterators.md | 4 ++--
gcc/config/arm/neon.md | 22 ++++++++++++++++++++++
gcc/testsuite/gcc.target/arm/neon-extend-1.c | 13 +++++++++++++
gcc/testsuite/gcc.target/arm/neon-extend-2.c | 13 +++++++++++++
5 files changed, 65 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-1.c
create mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-2.c
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 182c52a..35bf688 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4479,33 +4479,35 @@
;; Zero and sign extension instructions.
(define_insn "zero_extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w, r")
(zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
"<qhs_zextenddi_cstr>")))]
"TARGET_32BIT <qhs_zextenddi_cond>"
"#"
- [(set_attr "length" "8")
- (set_attr "ce_count" "2")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "8,8")
+ (set_attr "ce_count" "2,2")
+ (set_attr "predicable" "yes,yes")]
)
(define_insn "extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w,r")
(sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
"<qhs_extenddi_cstr>")))]
"TARGET_32BIT <qhs_sextenddi_cond>"
"#"
- [(set_attr "length" "8")
- (set_attr "ce_count" "2")
- (set_attr "shift" "1")
- (set_attr "predicable" "yes")]
+ [(set_attr "length" "8,8")
+ (set_attr "ce_count" "2,2")
+ (set_attr "shift" "1,1")
+ (set_attr "predicable" "yes,yes")]
)
;; Splits for all extensions to DImode
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (match_dup 1))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -4531,7 +4533,9 @@
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 1567264..07ac5da 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -409,8 +409,8 @@
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
(HI "nonimmediate_operand")
(QI "arm_reg_or_extendqisi_mem_op")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
-(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r,r") (HI "r,rm") (QI "r,rm")])
;; Mode attributes used for fixed-point support.
(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 6492721..618d59d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -5879,3 +5879,25 @@
(const_string "neon_fp_vadd_qqq_vabs_qq"))
(const_string "neon_int_5")))]
)
+
+;; Copy from core-to-neon regs, then extend, not vice-versa
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-1.c b/gcc/testsuite/gcc.target/arm/neon-extend-1.c
new file mode 100644
index 0000000..cfe83ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-extend-1.c
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (unsigned int a)
+{
+ unsigned long long b = a;
+ asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.u64" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-2.c b/gcc/testsuite/gcc.target/arm/neon-extend-2.c
new file mode 100644
index 0000000..1c5a17e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-extend-2.c
@@ -0,0 +1,13 @@
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+void
+f (int a)
+{
+ long long b = a;
+ asm volatile ("@ extended to %0" : : "w" (b));
+}
+
+/* { dg-final { scan-assembler "vdup.32" } } */
+/* { dg-final { scan-assembler "vshr.s64" } } */