GIMPLE code which performs a narrowing truncation on the result of a vector concatenation currently results in an unnecessary XTN being emitted following a UZP1 to concate the operands. In cases such as this, UZP1 should instead use a smaller arrangement specifier to replace the XTN instruction. This is seen in cases such as in this GIMPLE example:
int32x2_t foo (svint64_t a, svint64_t b) { vector(2) int vect__2.8; long int _1; long int _3; vector(2) long int _12; <bb 2> [local count: 1073741824]: _1 = svaddv_s64 ({ -1, 0, 0, 0, 0, 0, 0, 0, ... }, a_6(D)); _3 = svaddv_s64 ({ -1, 0, 0, 0, 0, 0, 0, 0, ... }, b_7(D)); _12 = {_1, _3}; vect__2.8_13 = (vector(2) int) _12; return vect__2.8_13; } Original assembly generated: bar: ptrue p3.b, all uaddv d0, p3, z0.d uaddv d1, p3, z1.d uzp1 v0.2d, v0.2d, v1.2d xtn v0.2s, v0.2d ret This patch therefore defines the *aarch64_trunc_concat<mode> insn which truncates the concatenation result, rather than concatenating the truncated operands (such as in *aarch64_narrow_trunc<mode>), resulting in the following optimised assembly being emitted: bar: ptrue p3.b, all uaddv d0, p3, z0.d uaddv d1, p3, z1.d uzp1 v0.2s, v0.2s, v1.2s ret This patch passes all regression tests on aarch64 with no new failures. A supporting test for this optimisation is also written and passes. OK for master? I do not have commit rights so I cannot push the patch myself. gcc/ChangeLog: * config/aarch64/aarch64-simd.md: (*aarch64_trunc_concat) new insn definition. * config/aarch64/iterators.md: (VDQHSD_F): new mode iterator. (VTRUNCD): new mode attribute for truncated modes. (Vtruncd): new mode attribute for arrangement specifier. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/truncated_concatenation_1.c: new test for the above example and the int64x2 version of the above. --- gcc/config/aarch64/aarch64-simd.md | 16 ++++++++++++++ gcc/config/aarch64/iterators.md | 12 ++++++++++ .../aarch64/sve/truncated_concatenation_1.c | 22 +++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/truncated_concatenation_1.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cfe95bd4c31..de3dd444ecd 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1872,6 +1872,22 @@ [(set_attr "type" "neon_permute<q>")] ) +(define_insn "*aarch64_trunc_concat<mode>" + [(set (match_operand:<VTRUNCD> 0 "register_operand" "=w") + (truncate:<VTRUNCD> + (vec_concat:VDQHSD_F + (match_operand:<VHALF> 1 "register_operand" "w") + (match_operand:<VHALF> 2 "register_operand" "w"))))] + "TARGET_SIMD" +{ + if (!BYTES_BIG_ENDIAN) + return "uzp1\\t%0.<Vtruncd>, %1.<Vtruncd>, %2.<Vtruncd>"; + else + return "uzp1\\t%0.<Vtruncd>, %2.<Vtruncd>, %1.<Vtruncd>"; +} + [(set_attr "type" "neon_permute<q>")] +) + ;; Packing doubles. (define_expand "vec_pack_trunc_<mode>" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d7cb27e1885..3b28b2fae0c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -290,6 +290,10 @@ ;; Advanced SIMD modes for H, S and D types. (define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) +;; Advanced SIMD modes that can be truncated whilst preserving +;; the number of vector elements. +(define_mode_iterator VDQHSD_F [V8HI V4SI V2DI V2SF V4SF V2DF]) + (define_mode_iterator VDQHSD_V1DI [VDQHSD V1DI]) ;; Advanced SIMD and scalar integer modes for H and S. @@ -1722,6 +1726,14 @@ (define_mode_attr Vnarrowq2 [(V8HI "v16qi") (V4SI "v8hi") (V2DI "v4si")]) +;; Truncated Advanced SIMD modes which preserve the number of lanes. +(define_mode_attr VTRUNCD [(V8HI "V8QI") (V4SI "V4HI") + (V2SF "V2HF") (V4SF "V4HF") + (V2DI "V2SI") (V2DF "V2SF")]) +(define_mode_attr Vtruncd [(V8HI "8b") (V4SI "4h") + (V2SF "2h") (V4SF "4h") + (V2DI "2s") (V2DF "2s")]) + ;; Narrowed modes of vector modes. (define_mode_attr VNARROW [(VNx8HI "VNx16QI") (VNx4SI "VNx8HI") (VNx4SF "VNx8HF") diff --git a/gcc/testsuite/gcc.target/aarch64/sve/truncated_concatenation_1.c b/gcc/testsuite/gcc.target/aarch64/sve/truncated_concatenation_1.c new file mode 100644 index 00000000000..e0ad4209206 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/truncated_concatenation_1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -Wall -march=armv8.2-a+sve" } */ + +#include <arm_neon.h> +#include <arm_sve.h> + +int32x2_t foo (svint64_t a, svint64_t b) { + int32x2_t ab = vdup_n_s32 (0); + ab = vset_lane_s32 ((int)svaddv_s64 (svptrue_b64 (), a), ab, 0); + ab = vset_lane_s32 ((int)svaddv_s64 (svptrue_b64 (), b), ab, 1); + return ab; +} + +int64x2_t bar (svint64_t a, svint64_t b) { + int64x2_t ab = vdupq_n_s64(0); + ab = vsetq_lane_s64 ((int)svaddv_s64 (svptrue_b64 (), a), ab, 0); + ab = vsetq_lane_s64 ((int)svaddv_s64 (svptrue_b64 (), b), ab, 1); + return ab; +} + +/* { dg-final { scan-assembler-not {\txtn\t} } }*/ +/* { dg-final { scan-assembler-times {\tuzp1\tv[0-9]+\.2s, v[0-9]+\.2s, v[0-9]+\.2s} 2 } }*/ \ No newline at end of file -- 2.34.1