move_lo_quad_internal_<mode> and move_lo_quad_internal_be_<mode> partially duplicate the later aarch64_combinez{,_be}<mode> patterns. The duplication itself is a regression.
The only substantive differences between the two are: * combinez uses vector MOV (ORR) instead of element MOV (DUP). The former seems more likely to be handled via renaming. * combinez disparages the GPR->FPR alternative whereas move_lo_quad gave it equal cost. The new test gives a token example of when the combinez behaviour helps. gcc/ * config/aarch64/aarch64-simd.md (move_lo_quad_internal_<mode>) (move_lo_quad_internal_be_<mode>): Delete. (move_lo_quad_<mode>): Use aarch64_combine<Vhalf> instead of the above. gcc/testsuite/ * gcc.target/aarch64/vec-init-8.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 37 +------------------ gcc/testsuite/gcc.target/aarch64/vec-init-8.c | 15 ++++++++ 2 files changed, 17 insertions(+), 35 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vec-init-8.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c5bc2ea658b..d6cd4c70fe7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1584,46 +1584,13 @@ (define_insn "aarch64_<optab>p<mode>" ;; On little-endian this is { operand, zeroes } ;; On big-endian this is { zeroes, operand } -(define_insn "move_lo_quad_internal_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") - (vec_concat:VQMOV - (match_operand:<VHALF> 1 "register_operand" "w,r,r") - (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "@ - dup\\t%d0, %1.d[0] - fmov\\t%d0, %1 - dup\\t%d0, %1" - [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") - (set_attr "length" "4") - (set_attr "arch" "simd,fp,simd")] -) - -(define_insn "move_lo_quad_internal_be_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") - (vec_concat:VQMOV - (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero") - (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - dup\\t%d0, %1.d[0] - fmov\\t%d0, %1 - dup\\t%d0, %1" - [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") - (set_attr "length" "4") - (set_attr "arch" "simd,fp,simd")] -) - (define_expand "move_lo_quad_<mode>" [(match_operand:VQMOV 0 "register_operand") (match_operand:<VHALF> 1 "register_operand")] "TARGET_SIMD" { - rtx zs = CONST0_RTX (<VHALF>mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs)); - else - emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs)); + emit_insn (gen_aarch64_combine<Vhalf> (operands[0], operands[1], + CONST0_RTX (<VHALF>mode))); DONE; } ) diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-8.c b/gcc/testsuite/gcc.target/aarch64/vec-init-8.c new file mode 100644 index 00000000000..18f8afe10f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include <arm_neon.h> + +int64x2_t f1(int64_t *ptr) { + int64_t x = *ptr; + asm volatile ("" ::: "memory"); + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int64x2_t) { 0, x }; + else + return (int64x2_t) { x, 0 }; +} + +/* { dg-final { scan-assembler {\tldr\td0, \[x0\]\n} } } */ -- 2.25.1