Hi All, After my patch for cprop is committed (https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=224048), neon-reload-class.c is failing in arm mode. But this is kind of expected behaviour so I looked at the original patch related to neon-reload-class.c, which is: https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html
As I understand, it was added to make sure that these constants are not created as constant pool. from that point of view it looks like we should remove the check for movw/movt. I also checked the asm generated before the constant split for arm (https://gcc.gnu.org/viewcvs?rev=223235&root=gcc&view=rev) just to be sure that this is not due to the earlier patch. Reason for this is that the constant generation has changed compared to what was shown in https://gcc.gnu.org/ml/gcc-patches/2011-04/msg01968.html. I am also attaching the three versions of the neon-reload-class.s. 1. neon-reload-class_before_arm_split.s - before r223235 2. neon-reload-class_before_cprop.s - before r224048 3. neon-reload-class_new.s - after 224048 Is this OK for trunk? Thanks, Kugan gcc/testsuite/ChangeLog: 2015-06-05 Kugan Vivekanandarajah <kug...@linaro.org> * gcc.target/arm/neon-reload-class.c: Remove movw and movt.
diff --git a/gcc/testsuite/gcc.target/arm/neon-reload-class.c b/gcc/testsuite/gcc.target/arm/neon-reload-class.c index c63aa04..48950f7 100644 --- a/gcc/testsuite/gcc.target/arm/neon-reload-class.c +++ b/gcc/testsuite/gcc.target/arm/neon-reload-class.c @@ -15,4 +15,4 @@ _op_blend_p_caa_dp(unsigned *s, unsigned* e, unsigned *d, unsigned c) { /* These constants should be emitted as immediates rather than loaded from memory. */ -/* { dg-final { scan-assembler-not "(\\.d?word|mov(w|t))" } } */ +/* { dg-final { scan-assembler-not "(\\.d?word)" } } */
.arch armv7-a .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .arm .syntax divided .file "neon-reload-class.c" .text .align 2 .global _op_blend_p_caa_dp .type _op_blend_p_caa_dp, %function _op_blend_p_caa_dp: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 cmp r2, r1 bxcs lr stmfd sp!, {r4, r5, r6, r7, lr} add r6, r1, #3 add lr, r2, #4 add r4, r2, #16 sub r6, r6, lr add r7, r0, #16 mov r5, r4 cmp r2, r7 cmpcc r0, r5 mov ip, r6, lsr #2 mov r4, r7 add r5, ip, #1 movcs r4, #1 movcc r4, #0 cmp r5, #4 movls r4, #0 andhi r4, r4, #1 cmp r4, #0 beq .L3 sub ip, ip, #3 cmp r6, #11 mov ip, ip, lsr #2 add ip, ip, #1 mov r7, ip, asl #2 bls .L4 vdup.32 q10, r3 mov r6, r0 mov r4, r2 mov lr, #0 .L5: vld1.32 {q8}, [r6] add lr, lr, #1 cmp ip, lr add r6, r6, #16 vmov q9, q8 @ v4si vshr.u32 q8, q8, #8 vand.i16 q9, #255 vand.i16 q8, #255 vmul.i32 q9, q9, q10 vmul.i32 q8, q8, q10 vshr.u32 q9, q9, #8 vand.i16 q9, #255 vand.i16 q8, #65280 vadd.i32 q8, q8, q9 vst1.32 {q8}, [r4] add r4, r4, #16 bhi .L5 cmp r5, r7 mov r7, r7, asl #2 add r0, r0, r7 add r2, r2, r7 ldmeqfd sp!, {r4, r5, r6, r7, pc} add lr, r2, #4 .L4: mov r5, #255 mov r6, #65280 sub r0, r0, #4 movt r5, 255 movt r6, 65280 .L7: ldr ip, [r0, #4]! cmp r1, lr and r4, ip, r5 and ip, r5, ip, lsr #8 mul r4, r3, r4 mul ip, r3, ip and r4, r5, r4, lsr #8 and ip, ip, r6 add ip, ip, r4 str ip, [r2] mov r2, lr ldmlsfd sp!, {r4, r5, r6, r7, pc} add lr, lr, #4 b .L7 .L3: mov r5, #255 mov r6, #65280 sub r0, r0, #4 movt r5, 255 movt r6, 65280 .L10: ldr ip, [r0, #4]! cmp r1, lr and r4, ip, r5 and ip, r5, ip, lsr #8 mul r4, r3, r4 mul ip, r3, ip and r4, r5, r4, lsr #8 and ip, ip, r6 add ip, ip, r4 str ip, [r2] mov r2, lr ldmlsfd sp!, {r4, r5, r6, r7, pc} add lr, lr, #4 b .L10 .size _op_blend_p_caa_dp, .-_op_blend_p_caa_dp .ident "GCC: (GNU) 6.0.0 20150602 (experimental)" .section .note.GNU-stack,"",%progbits
.arch armv7-a .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .arm .syntax divided .file "neon-reload-class.c" .text .align 2 .global _op_blend_p_caa_dp .type _op_blend_p_caa_dp, %function _op_blend_p_caa_dp: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 cmp r2, r1 bxcs lr stmfd sp!, {r4, r5, r6, r7, lr} add r6, r1, #3 add r4, r2, #4 add lr, r2, #16 sub r6, r6, r4 add r7, r0, #16 mov r5, lr cmp r2, r7 cmpcc r0, r5 mov ip, r6, lsr #2 mov lr, r7 add r5, ip, #1 movcs lr, #1 movcc lr, #0 cmp r5, #4 movls lr, #0 andhi lr, lr, #1 cmp lr, #0 beq .L3 sub ip, ip, #3 cmp r6, #11 mov ip, ip, lsr #2 add ip, ip, #1 mov r7, ip, asl #2 bls .L4 vdup.32 q10, r3 mov r6, r0 mov r4, r2 mov lr, #0 .L5: vld1.32 {q8}, [r6] add lr, lr, #1 cmp ip, lr add r6, r6, #16 vmov q9, q8 @ v4si vshr.u32 q8, q8, #8 vand.i16 q9, #255 vand.i16 q8, #255 vmul.i32 q9, q9, q10 vmul.i32 q8, q8, q10 vshr.u32 q9, q9, #8 vand.i16 q9, #255 vand.i16 q8, #65280 vadd.i32 q8, q8, q9 vst1.32 {q8}, [r4] add r4, r4, #16 bhi .L5 cmp r5, r7 mov r7, r7, asl #2 add r0, r0, r7 add r2, r2, r7 ldmeqfd sp!, {r4, r5, r6, r7, pc} add r4, r2, #4 .L4: sub r0, r0, #4 .L7: ldr ip, [r0, #4]! cmp r1, r4 bic r5, ip, #-16777216 bic r5, r5, #65280 mov ip, ip, lsr #8 bic ip, ip, #-16777216 mul r5, r3, r5 bic ip, ip, #65280 mul lr, r3, ip mov ip, r5, lsr #8 bic r5, ip, #-16777216 bic ip, lr, #16711680 bic r5, r5, #65280 bic ip, ip, #255 add ip, ip, r5 str ip, [r2] mov r2, r4 ldmlsfd sp!, {r4, r5, r6, r7, pc} add r4, r4, #4 b .L7 .L3: sub r0, r0, #4 .L10: ldr ip, [r0, #4]! cmp r1, r4 bic r5, ip, #-16777216 bic r5, r5, #65280 mov ip, ip, lsr #8 bic ip, ip, #-16777216 mul r5, r3, r5 bic ip, ip, #65280 mul lr, r3, ip mov ip, r5, lsr #8 bic r5, ip, #-16777216 bic ip, lr, #16711680 bic r5, r5, #65280 bic ip, ip, #255 add ip, ip, r5 str ip, [r2] mov r2, r4 ldmlsfd sp!, {r4, r5, r6, r7, pc} add r4, r4, #4 b .L10 .size _op_blend_p_caa_dp, .-_op_blend_p_caa_dp .ident "GCC: (GNU) 6.0.0 20150528 (experimental)" .section .note.GNU-stack,"",%progbits
.arch armv7-a .fpu neon .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .arm .syntax divided .file "neon-reload-class.c" .text .align 2 .global _op_blend_p_caa_dp .type _op_blend_p_caa_dp, %function _op_blend_p_caa_dp: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 cmp r2, r1 bxcs lr stmfd sp!, {r4, r5, r6, r7, lr} add r6, r1, #3 add r4, r2, #4 add lr, r2, #16 sub r6, r6, r4 add r7, r0, #16 mov r5, lr cmp r2, r7 cmpcc r0, r5 mov ip, r6, lsr #2 mov lr, r7 add r5, ip, #1 movcs lr, #1 movcc lr, #0 cmp r5, #4 movls lr, #0 andhi lr, lr, #1 cmp lr, #0 beq .L3 sub ip, ip, #3 cmp r6, #11 mov ip, ip, lsr #2 add ip, ip, #1 mov r7, ip, asl #2 bls .L4 vdup.32 q10, r3 mov r6, r0 mov r4, r2 mov lr, #0 .L5: vld1.32 {q8}, [r6] add lr, lr, #1 cmp ip, lr add r6, r6, #16 vmov q9, q8 @ v4si vshr.u32 q8, q8, #8 vand.i16 q9, #255 vand.i16 q8, #255 vmul.i32 q9, q9, q10 vmul.i32 q8, q8, q10 vshr.u32 q9, q9, #8 vand.i16 q9, #255 vand.i16 q8, #65280 vadd.i32 q8, q8, q9 vst1.32 {q8}, [r4] add r4, r4, #16 bhi .L5 cmp r5, r7 mov r7, r7, asl #2 add r0, r0, r7 add r2, r2, r7 ldmeqfd sp!, {r4, r5, r6, r7, pc} add r4, r2, #4 .L4: sub r0, r0, #4 .L7: ldr ip, [r0, #4]! cmp r1, r4 bic r5, ip, #-16777216 bic r5, r5, #65280 mov ip, ip, lsr #8 bic ip, ip, #-16777216 mul r5, r3, r5 bic ip, ip, #65280 mul lr, r3, ip mov ip, r5, lsr #8 bic r5, ip, #-16777216 bic ip, lr, #16711680 bic r5, r5, #65280 bic ip, ip, #255 add ip, ip, r5 str ip, [r2] mov r2, r4 ldmlsfd sp!, {r4, r5, r6, r7, pc} add r4, r4, #4 b .L7 .L3: sub r0, r0, #4 .L10: ldr ip, [r0, #4]! cmp r1, r4 bic r5, ip, #-16777216 bic r5, r5, #65280 mov ip, ip, lsr #8 bic ip, ip, #-16777216 mul r5, r3, r5 bic ip, ip, #65280 mul lr, r3, ip mov ip, r5, lsr #8 bic r5, ip, #-16777216 bic ip, lr, #16711680 bic r5, r5, #65280 bic ip, ip, #255 add ip, ip, r5 str ip, [r2] mov r2, r4 ldmlsfd sp!, {r4, r5, r6, r7, pc} add r4, r4, #4 b .L10 .size _op_blend_p_caa_dp, .-_op_blend_p_caa_dp .ident "GCC: (GNU) 6.0.0 20150520 (experimental)" .section .note.GNU-stack,"",%progbits