On 11.05.2012 16:48, Ramana Radhakrishnan wrote:
I would change the iterator from VQX to VQ in the pattern above (you
can also simplify the setting of neon_type in that case as well as
change that to be a vec_duplicate as below and get rid of any
lingering definitions of UNSPEC_VLD1_DUP if they exist), define a
separate pattern that expressed this as a define_insn_and_split as
below.
(define_insn_and_split "neon_vld1_dupv2di"
[(set (match_operand:V2DI 0 "s_register_operand" "=w")
(vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx tmprtx = gen_lowpart (DImode, operands[0]);
emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
DONE;
}
(set_attr "length" "8")
(set_attr "neon_type" "<fromearlierpattern">)
)
Do you want to try this and see what you get ?
Thanks for this example and suggestion, it does work.
I'd rather have an extra regression test in gcc.target/arm that was a run time test. for e.g. take a look at gcc.target/arm/neon-vadds64.c .
Here is an updated patch:
2012-05-16 Christophe Lyon <christophe.l...@st.com>
* gcc/config/arm/neon.md (neon_vld1_dup): Restrict to VQ
operands.
(neon_vld1_dupv2di): New, fixes vld1q_dup_s64.
* gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c: New test.
Index: gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c
===================================================================
--- gcc.orig/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0)
+++ gcc.new/gcc/testsuite/gcc.target/arm/neon-vld1_dupQ.c (revision 0)
@@ -0,0 +1,24 @@
+/* Test the `vld1q_s64' ARM Neon intrinsic. */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O0" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main (void)
+{
+ int64x1_t input[2] = {(int64x1_t)0x0123456776543210LL,
+ (int64x1_t)0x89abcdeffedcba90LL};
+ int64x1_t output[2] = {0, 0};
+ int64x2_t var = vld1q_dup_s64(input);
+
+ vst1q_s64(output, var);
+ if (output[0] != (int64x1_t)0x0123456776543210LL)
+ abort();
+ if (output[1] != (int64x1_t)0x0123456776543210LL)
+ abort();
+ return 0;
+}
Index: gcc/config/arm/neon.md
===================================================================
--- gcc.orig/gcc/config/arm/neon.md (revision 2659)
+++ gcc.new/gcc/config/arm/neon.md (working copy)
@@ -4195,20 +4195,32 @@
)
(define_insn "neon_vld1_dup<mode>"
- [(set (match_operand:VQX 0 "s_register_operand" "=w")
- (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
+ [(set (match_operand:VQ 0 "s_register_operand" "=w")
+ (unspec:VQ [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
UNSPEC_VLD1_DUP))]
"TARGET_NEON"
{
- if (GET_MODE_NUNITS (<MODE>mode) > 2)
return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
- else
- return "vld1.<V_sz_elem>\t%h0, %A1";
}
[(set (attr "neon_type")
- (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
- (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
- (const_string "neon_vld1_1_2_regs")))]
+ (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes"))]
+)
+
+(define_insn_and_split "neon_vld1_dupv2di"
+ [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+ (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
+ "TARGET_NEON"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ rtx tmprtx = gen_lowpart (DImode, operands[0]);
+ emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
+ emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
+ DONE;
+ }
+ [(set_attr "length" "8")
+ (set (attr "neon_type") (const_string
"neon_vld2_2_regs_vld1_vld2_all_lanes"))]
)
(define_expand "vec_store_lanes<mode><mode>"