https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63173
--- Comment #2 from Venkataramanan <venkataramanan.kumar at amd dot com> ---
Changed the test case to work with latest GCC trunk
#include <arm_neon.h>
int16x4x2_t foo(int16_t * __restrict pDataA,
int16_t * __restrict pDataB)
{
int16x4x2_t DataA, DataB, DataC;
DataA = vld2_dup_s16(pDataA);
DataB = vld2_dup_s16(pDataB);
DataC.val[0] = vqadd_s16( DataA.val[0], DataB.val[0] );
DataC.val[1] = vqadd_s16( DataA.val[1], DataB.val[1] );
return DataC;
}
Still seeing loads and stores via memory.
foo:
sub sp, sp, #16
// Start of user assembly
// 11788
"/home/venkataramanan-kumar/work/pr62308/builds/destdir/x86_64-unknown-linux-gnu/lib/gcc/aarch64-none-elf/5.0.0/include/arm_neon.h"
1
ld2r {v16.4h, v17.4h}, [x0]
st1 {v16.4h, v17.4h}, [sp]
// 0 "" 2
// End of user assembly
ldr d0, [sp]
ldr d1, [sp, 8]
// Start of user assembly
// 11788
"/home/venkataramanan-kumar/work/pr62308/builds/destdir/x86_64-unknown-linux-gnu/lib/gcc/aarch64-none-elf/5.0.0/include/arm_neon.h"
1
ld2r {v16.4h, v17.4h}, [x1]
st1 {v16.4h, v17.4h}, [sp]
// 0 "" 2
// End of user assembly
ldr d3, [sp]
ldr d2, [sp, 8]
add sp, sp, 16
sqadd v0.4h, v0.4h, v3.4h
sqadd v1.4h, v1.4h, v2.4h
ret
.size foo, .-foo
.ident "GCC: (Linaro GCC 2014.10) 5.0.0 20140930 (experimental)"