On Wed, May 05, 2021 at 07:44:46PM +0200, Jakub Jelinek via Gcc-patches wrote: > So perhaps just the vd->e[dr].mode in there could change to > GET_MODE (src) and drop the previous PR98694 change?
I've bootstrapped/regtested that successfully on the trunk (on {x86_64,i686}-linux), though haven't attempted to merge the two comments: 2021-05-06 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/100342 * regcprop.c (copy_value): When copying a source reg in a wider mode than it has recorded for the value, adjust recorded destination mode too. * gcc.target/i386/pr100342.c: New test. --- gcc/regcprop.c.jj 2020-04-30 17:41:37.624675304 +0200 +++ gcc/regcprop.c 2021-05-05 16:24:01.667308941 +0200 @@ -382,10 +382,22 @@ copy_value (rtx dest, rtx src, struct va (set (reg:HI R5) (reg:HI R[0-4])) in which all registers have only 16 defined bits. */ - else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) - && partial_subreg_p (vd->e[sr].mode, - vd->e[vd->e[sr].oldest_regno].mode)) - return; + + /* If a narrower value is copied using wider mode, the upper bits + are undefined (could be e.g. a former paradoxical subreg). Signal + in that case we've only copied value using the narrower mode. + Consider: + (set (reg:DI r14) (mem:DI ...)) + (set (reg:QI si) (reg:QI r14)) + (set (reg:DI bp) (reg:DI r14)) + (set (reg:DI r14) (const_int ...)) + (set (reg:DI dx) (reg:DI si)) + (set (reg:DI si) (const_int ...)) + (set (reg:DI dx) (reg:DI bp)) + The last set is not redundant, while the low 8 bits of dx are already + equal to low 8 bits of bp, the other bits are undefined. */ + else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src))) + set_value_regno (dr, vd->e[sr].mode, vd); /* Link DR at the end of the value chain used by SR. */ --- gcc/testsuite/gcc.target/i386/pr100342.c.jj 2021-05-05 17:01:29.139356719 +0200 +++ gcc/testsuite/gcc.target/i386/pr100342.c 2021-05-05 17:01:14.287521150 +0200 @@ -0,0 +1,70 @@ +/* PR rtl-optimization/100342 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fno-dse -fno-forward-propagate -Wno-psabi -mno-sse2" } */ + +#define SHL(x, y) ((x) << ((y) & (sizeof(x) * 8 - 1))) +#define SHR(x, y) ((x) >> ((y) & (sizeof(x) * 8 - 1))) +#define ROR(x, y) (SHR(x, y)) | (SHL(x, (sizeof(x) * 8 - (y)))) +#define SHLV(x, y) ((x) << ((y) & (sizeof((x)[0]) * 8 - 1))) +#define SHLSV(x, y) ((x) << ((y) & (sizeof((y)[0]) * 8 - 1))) +typedef unsigned char A; +typedef unsigned char __attribute__((__vector_size__ (8))) B; +typedef unsigned char __attribute__((__vector_size__ (16))) C; +typedef unsigned char __attribute__((__vector_size__ (32))) D; +typedef unsigned char __attribute__((__vector_size__ (64))) E; +typedef unsigned short F; +typedef unsigned short __attribute__((__vector_size__ (16))) G; +typedef unsigned int H; +typedef unsigned int __attribute__((__vector_size__ (32))) I; +typedef unsigned long long J; +typedef unsigned long long __attribute__((__vector_size__ (8))) K; +typedef unsigned long long __attribute__((__vector_size__ (32))) L; +typedef unsigned long long __attribute__((__vector_size__ (64))) M; +typedef unsigned __int128 N; +typedef unsigned __int128 __attribute__((__vector_size__ (16))) O; +typedef unsigned __int128 __attribute__((__vector_size__ (32))) P; +typedef unsigned __int128 __attribute__((__vector_size__ (64))) Q; +B v1; +D v2; +L v3; +K v4; +I v5; +O v6; + +B +foo (A a, C b, E c, F d, G e, H f, J g, M h, N i, P j, Q k) +{ + b &= (A) f; + k += a; + G l = e; + D m = v2 >= (A) (J) v1; + J r = a + g; + L n = v3 <= f; + k -= i / f; + l -= (A) g; + c |= (A) d; + b -= (A) i; + J o = ROR (__builtin_clz (r), a); + K p = v4 | f, q = v4 <= f; + P s = SHLV (SHLSV (__builtin_bswap64 (i), (P) (0 < j)) <= 0, j); + n += a <= r; + M t = (M) (a / SHLV (c, 0)) != __builtin_bswap64 (i); + I u = f - v5; + E v = (E) h + (E) t + (E) k; + D w = (union { D b[2]; }) { }.b[0] + ((union { E b; }) v).b[1] + m + (D) u + (D) n + (D) s; + C x = ((union { D b; }) w).b[1] + b + (C) l + (C) v6; + B y = ((union { C a; B b; }) x).b + ((union { C a; B b[2]; }) x).b[1] + (B) p + (B) q; + J z = i + o; + F z2 = z; + A z3 = z2; + return y + z3; +} + +int +main () +{ + B x = foo (0, (C) { }, (E) { }, 10, (G) { }, 4, 2, (M) { }, 123842323652213865LL, (P) { 1 }, (Q) { }); + if ((J) x != 0x2e2c2e2c2e2c2e30ULL) + __builtin_abort(); + return 0; +} Jakub