Hi! The r10-2806 change regressed following testcases, instead of doing int -> unsigned long sign-extension once and then add 8, 16, ... 56 to it for each of the memory access, it adds 8, 16, ... 56 in int mode and then sign extends each. So that means: + movq $0, (%rsp,%rax,8) + leal 1(%rdx), %eax + cltq + movq $0, (%rsp,%rax,8) + leal 2(%rdx), %eax + cltq + movq $0, (%rsp,%rax,8) + leal 3(%rdx), %eax + cltq + movq $0, (%rsp,%rax,8) + leal 4(%rdx), %eax + cltq + movq $0, (%rsp,%rax,8) + leal 5(%rdx), %eax + cltq + movq $0, (%rsp,%rax,8) + leal 6(%rdx), %eax + addl $7, %edx + cltq + movslq %edx, %rdx + movq $0, (%rsp,%rax,8) movq $0, (%rsp,%rdx,8) - movq $0, 8(%rsp,%rdx,8) - movq $0, 16(%rsp,%rdx,8) - movq $0, 24(%rsp,%rdx,8) - movq $0, 32(%rsp,%rdx,8) - movq $0, 40(%rsp,%rdx,8) - movq $0, 48(%rsp,%rdx,8) - movq $0, 56(%rsp,%rdx,8) GCC 9 -> 10 change or: - movq $0, (%rsp,%rdx,8) - movq $0, 8(%rsp,%rdx,8) - movq $0, 16(%rsp,%rdx,8) - movq $0, 24(%rsp,%rdx,8) - movq $0, 32(%rsp,%rdx,8) - movq $0, 40(%rsp,%rdx,8) - movq $0, 48(%rsp,%rdx,8) - movq $0, 56(%rsp,%rdx,8) + movq $0, (%rsp,%rax,8) + leal 1(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 2(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 3(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 4(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 5(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 6(%rdx), %eax + movq $0, (%rsp,%rax,8) + leal 7(%rdx), %eax + movq $0, (%rsp,%rax,8) change on the other test. While for the former case of int there is due to signed integer overflow (unless -fwrapv) the possibility to undo it e.g. during expansion, for the unsigned case information is unfortunately lost.
The following patch adds single_use case which restores these testcases but keeps the testcases the patch meant to improve as is. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2021-02-24 Jakub Jelinek <ja...@redhat.com> PR target/95798 * match.pd ((T)(A) + CST -> (T)(A + CST)): Add single_use check. * gcc.target/i386/pr95798-1.c: New test. * gcc.target/i386/pr95798-2.c: New test. --- gcc/match.pd.jj 2021-02-24 12:58:22.233006845 +0100 +++ gcc/match.pd 2021-02-24 15:41:15.647777030 +0100 @@ -2492,11 +2492,12 @@ (define_operator_list COND_TERNARY /* ((T)(A)) + CST -> (T)(A + CST) */ #if GIMPLE (simplify - (plus (convert SSA_NAME@0) INTEGER_CST@1) + (plus (convert@2 SSA_NAME@0) INTEGER_CST@1) (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE && TREE_CODE (type) == INTEGER_TYPE && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) - && int_fits_type_p (@1, TREE_TYPE (@0))) + && int_fits_type_p (@1, TREE_TYPE (@0)) + && single_use (@2)) /* Perform binary operation inside the cast if the constant fits and (A + CST)'s range does not overflow. */ (with --- gcc/testsuite/gcc.target/i386/pr95798-1.c.jj 2021-02-24 15:58:06.935598077 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-1.c 2021-02-24 16:02:47.298504500 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ + +void bar (unsigned long long *, int); + +void +foo (int y, unsigned long long z) +{ + unsigned long long x[1024]; + unsigned long long i = y % 127; + __builtin_memset (x, -1, sizeof (x)); + x[i] = 0; + x[i + 1] = 1; + x[i + 2] = 2; + x[i + 3] = 3; + x[i + 4] = 4; + x[i + 5] = 5; + x[i + 6] = 6; + x[i + 7] = 7; + bar (x, y); +} --- gcc/testsuite/gcc.target/i386/pr95798-2.c.jj 2021-02-24 16:01:39.708250302 +0100 +++ gcc/testsuite/gcc.target/i386/pr95798-2.c 2021-02-24 16:03:57.497729907 +0100 @@ -0,0 +1,29 @@ +/* PR target/95798 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler "1, 8\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "2, 16\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "3, 24\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "4, 32\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "5, 40\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "6, 48\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ +/* { dg-final { scan-assembler "7, 56\\\(%rsp,%r\[a-z0-9]*,8\\\)" { target lp64 } } } */ + +void bar (unsigned long long *, int); + +void +foo (unsigned int y, unsigned long long z) +{ + unsigned long long x[1024]; + unsigned long long i = y % 127; + __builtin_memset (x, -1, sizeof (x)); + x[i] = 0; + x[i + 1] = 1; + x[i + 2] = 2; + x[i + 3] = 3; + x[i + 4] = 4; + x[i + 5] = 5; + x[i + 6] = 6; + x[i + 7] = 7; + bar (x, y); +} Jakub