Hi,
This patch fixes the pattern mve_mov for the case where both MVE vectors
are in R registers and the move does not get optimized away. I use the
same approach as we do for NEON, where we use four register moves.
Bootstrapped on arm-linux-gnueabihf and ran mve testsuite on arm-none-eabi.
Is this OK for trunk?
gcc/ChangeLog:
2020-03-20 Andre Vieira <andre.simoesdiasvie...@arm.com>
* config/arm/mve.md (mve_mov<mode>): Fix R->R case.
gcc/testsuite/ChangeLog:
2020-03-** Andre Vieira <andre.simoesdiasvie...@arm.com>
* gcc.target/arm/mve/intrinsics/mve_move_gpr_to_gpr.c: New test.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index
3cdb2e71cf04d45d220f6667646d226c8015659a..3015df7a6af0ab50e0ae47894f63597ada8566c5
100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -295,7 +295,7 @@ (define_insn "*mve_mov<mode>"
else
return "vldrb.8 %q0, %E1";
case 5:
- return output_move_neon (operands);
+ return output_move_quad (operands);
case 7:
return "vstrb.8 %q1, %E0";
default:
@@ -303,7 +303,7 @@ (define_insn "*mve_mov<mode>"
return "";
}
}
- [(set_attr "type"
"mve_move,mve_move,mve_move,mve_move,mve_load,mve_move,mve_move,mve_store")
+ [(set_attr "type"
"mve_move,mve_move,mve_move,mve_move,mve_load,multiple,mve_move,mve_store")
(set_attr "length" "4,8,8,4,8,8,4,4")
(set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*")
(set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_move_gpr_to_gpr.c
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_move_gpr_to_gpr.c
new file mode 100644
index
0000000000000000000000000000000000000000..791b8529a052dfca42e12648b6967eca3a2a985e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_move_gpr_to_gpr.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O2 -mfloat-abi=softfp" } */
+
+#include "arm_mve.h"
+
+extern int bar (float16x8_t, float16_t);
+
+extern void foobar (float16_t);
+
+int
+foo (float16x8_t a, float16_t b)
+{
+ foobar (b);
+ return bar (a, b);
+}
+