Hi,
When merging classes, cse computes new equivalences for constants.
In the PR we have
(insn 1173 1172 1174 2 (set (reg:V8QI 33 v1)
(const_vector:V8QI [
(const_int 3 [0x3])
(const_int -4 [0xfffffffffffffffc])
(const_int 0 [0]) repeated x6
])) "pr121649.c":63:3 1325 {*aarch64_simd_movv8qi}
(nil))
of which the second element is selected:
(insn 1178 1177 1179 2 (set (reg:QI 4 x4)
(vec_select:QI (reg:V8QI 33 v1)
(parallel [
(const_int 1 [0x1])
]))) "pr121649.c":63:3 2968 {aarch64_get_lanev8qi}
(expr_list:REG_EQUAL (const_int -4 [0xfffffffffffffffc])
(nil)))
We find (const_int 3 [0x3]) and a few others to be equivalent.
When computing a new const equivalence
x = gen_lowpart (GET_MODE (x), x_ent->const_rtx);
= gen_lowpart (V8QI, (const_int 3));
gen_lowpart_common uses DImode for the constant, thus V8QI "subregs" the
whole scalar register, seeing {3, 0, 0, ... }. Subsequently, we use
(const_vector:V8QI [
(const_int 3 [0x3])
(const_int 0 [0]) repeated x7])
instead of the original const_vector and selecting the second element in
insn 1178 wrongly returns 0 instead of 1.
This patch checks if the original mode of the constant (here QImode) is
not larger than the outer subreg mode if we're dealing with multiple
units.
Bootstrapped and regtested on x86, power10, and aarch64.
Regtested on riscv64.
Regards
Robin
PR rtl-optimization/121649
gcc/ChangeLog:
* cse.cc (equiv_constant): Only create lowpart subreg if the
outer mode is smaller than the inner.
gcc/testsuite/ChangeLog:
* gcc.dg/torture/pr121649.c: New test.
---
gcc/cse.cc | 13 ++-
gcc/testsuite/gcc.dg/torture/pr121649.c | 130 ++++++++++++++++++++++++
2 files changed, 142 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/torture/pr121649.c
diff --git a/gcc/cse.cc b/gcc/cse.cc
index 4eaef602366..4f31c2b9dfa 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -3766,7 +3766,18 @@ equiv_constant (rtx x)
int x_q = REG_QTY (REGNO (x));
struct qty_table_elem *x_ent = &qty_table[x_q];
- if (x_ent->const_rtx)
+ /* If X is a small vector, we might end up clobbering some
+ elements like for:
+ gen_lowpart (V8QI, (const_int 3))
+ gen_lowpart_common might use DImode for the constant and
+ a DImode 3 overlaps all 8 elements of the original X,
+ setting them to {3, 0, 0, ... }.
+ Therefore, only take the low-part if the outer mode is
+ smaller. */
+ if (x_ent->const_rtx
+ && (known_eq (GET_MODE_NUNITS (GET_MODE (x)), 1)
+ || known_le (GET_MODE_SIZE (GET_MODE (x)),
+ GET_MODE_SIZE (x_ent->mode))))
x = gen_lowpart (GET_MODE (x), x_ent->const_rtx);
}
diff --git a/gcc/testsuite/gcc.dg/torture/pr121649.c
b/gcc/testsuite/gcc.dg/torture/pr121649.c
new file mode 100644
index 00000000000..8af7e9b60fe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr121649.c
@@ -0,0 +1,130 @@
+/* { dg-do run { target bitint } } */
+/* { dg-additional-options "-Wno-psabi" } */
+
+typedef __attribute__((__vector_size__ (8))) char v64s8;
+typedef __attribute__((__vector_size__ (8))) short v64s16;
+typedef __attribute__((__vector_size__ (8))) int v64s32;
+typedef __attribute__((__vector_size__ (16))) char v128s8;
+typedef __attribute__((__vector_size__ (16))) short v128s16;
+typedef __attribute__((__vector_size__ (16))) int v128s32;
+typedef __attribute__((__vector_size__ (16))) long v128s64;
+typedef __attribute__((__vector_size__ (32))) char v256s8;
+typedef __attribute__((__vector_size__ (32))) short v256s16;
+typedef __attribute__((__vector_size__ (32))) int v256s32;
+typedef __attribute__((__vector_size__ (32))) long v256s64;
+typedef __attribute__((__vector_size__ (32))) __int128 v256s128;
+typedef __attribute__((__vector_size__ (64))) char v512s8;
+typedef __attribute__((__vector_size__ (64))) short v512s16;
+typedef __attribute__((__vector_size__ (64))) int v512s32;
+typedef __attribute__((__vector_size__ (64))) long v512s64;
+typedef __attribute__((__vector_size__ (64))) __int128 v512s128;
+
+__attribute__((__noipa__)) __attribute__((__cold__)) void
+foo0 (int, int, int, int,
+ _BitInt (3) sb3_0, char, char, char,
+ char, char, char, char, char,
+ char,char,char,char,
+ _BitInt (5), _BitInt (5), short, short, char,
+ short, short, char, int,
+ int, int, int, int, char,
+ long, long, long, long, long,
+ long, char, char, char, __int128,
+ __int128, __int128, __int128, __int128,
+ __int128, char, _BitInt (129), _BitInt (129),
+ _BitInt (255), _BitInt (255), _BitInt (256), _BitInt (256),
+ _BitInt (257), _BitInt (257), _BitInt (511), _BitInt (511),
+ _BitInt (512), _BitInt (512), _BitInt (513), _BitInt (513),
+ _BitInt (1023), _BitInt (1023), _BitInt (1024),
+ _BitInt (1024), _BitInt (1025), _BitInt (1025), _BitInt (331),
+ _BitInt (331), _BitInt (412), _BitInt (412), _BitInt (985),
+ _BitInt (985), _BitInt (60692), _BitInt (60692), char, char,
+ char, char, short, short, char, char, short, short, unsigned,
+ int, v64s8, v64s8, v64s16, v64s16, v64s32, v64s32, long, long,
+ v128s8, v128s8, short, v128s16, int, v128s32, long, v128s64,
+ __int128, __int128, v256s8, v256s8, v256s16, v256s16, v256s32,
+ v256s32, v256s64, v256s64, v256s128, v256s128, v512s8, v512s8,
+ v512s16, v512s16, v512s32, v512s32, v512s64, v512s64,
+ v512s128, v512s128, char *ret)
+{
+ *ret = sb3_0;
+}
+
+void xprintf(const char *s, ...)
+{
+ __builtin_va_list arg;
+ __builtin_va_start(arg, s);
+ int x = __builtin_va_arg(arg, int);
+ if (x != 0xfc)
+ __builtin_abort();
+ __builtin_va_end(arg);
+}
+
+int
+main ()
+{
+ char x;
+ foo0 (0, 0, 0, 0, -4, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -11, 9, 2, 95,
+ 67, -24064, 0, 5, 6, 7, 0, 8, 9, 20, 4, 3, 6018427387903, 53,
+ 505857273496, 255, 6, 0, 18446744073709551612uwb,
+ 70141183460469231731687303715884105726uwb,
+ 0x1ab7415df78c5d6355b5bf722d2c32wb, 18266202578652419339uwb,
+ 9594516560314871492uwb, 18446744073709551615uwb, 0,
+ 404678514476015038251338572701189uwb, 0,
+
6044618658097711785492504343953926634992332820282019728792003956564819964uwb,
+ 0, 0,
+
6044618658097711785492504343953926634992332820282019728792003956564819967uwb,
+ 0,
+ 0x10000000000000000000000000000000000000000000000000000000000000000wb,
0, 1, 0, 606316039853739674053402528525595147334676uwb, 0,
+
407807929942597099574024998205846127479363981533648751059337645126787177315524892796678197877342278766808798337256961671307362116160553321514205699439425uwb,
+ 0,
+
40449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155532867238014401323013014363877264556147687333148794044698496604162935270824544179699376777403364901791967663653826658703420189949487003960239619175525152082242122474284732uwb,
+
90772930519078902473361797697894230657273430081157732675805500963132708477322407536021120113879871393357658789768814416622492847430639474124377767893424865485276302219601246094119453082952085005768838150682342462881473913110540827237163350510684586298239947245938479716304835356329623282435531uwb,
+ 0, 0,
+
30203714894603546035770925859109268843954143792619895153655326951406405759993601526034894524347802740350892957243539456uwb,
+ 0,
+
0x24c07bd236a355dac41c3e00fed83a2c01d47d7e7c155128f8474ab8acd075fe3be2661e9ba3b0498c2wb,
+ 0,
+
-0x273d7a44735564a706be6f876e8c09a64c065028fc8719feace6b4ad22b722fc6628809c1bcfda902d563717b88876aa068496bwb,
+ 0, 5, 0, 4, 0, 5, 0, 0115, 3, 92, 0, 1, 840, 839, 35, 4095, (v64s8)
+ { }, (v64s8)
+ { 3, -0x4 }, (v64s16)
+ { }, (v64s16)
+ { }, (v64s32)
+ { }, (v64s32)
+ { }, 7, 40, (v128s8)
+ { }, (v128s8)
+ { }, 126, (v128s16)
+ { }, 95, (v128s32)
+ { }, 6, (v128s64)
+ { }, 18446462603027742720uwb, 11842991817553718011uwb, (v256s8)
+ { 227, 84, 73, 5, 2, 45, 107, 55, 16, 128, 23, 17, 1, 226, 15 },
+ (v256s8)
+ { 9, 111, 108, 19, 98, 27, 97, -0xe }, (v256s16)
+ { 1, 167, 65280, 35, 15286, 9, 182, 13126, 15 }, (v256s16)
+ { 72, 381, 185, 619, 978, 926, 42 }, (v256s32)
+ { 20, 72, 905, 207, 63, 40 }, (v256s32)
+ { 5, 503, 1, 152, 5519391 }, (v256s64)
+ { 1280 }, (v256s64)
+ { 94, 8851183291103289765uwb, 526 }, (v256s128)
+ { 15013856701581711399uwb }, (v256s128)
+ { 10389555769421265252uwb, 15766594437788011035uwb }, (v512s8)
+ { 104, 131, 5, 21, 32, 127, 75, 54, 232, 1, 82, 255, 28, 178, 1,
+ 121, 1, 14, 210, 240, 140, 53, 4, 91, 204, 78, 130, 76, 212, 52, 1,
+ 200, 11, 25, 251, 41, 88, 12, 3, 59 }, (v512s8)
+ { 13, 10, 48, -0x2f, 67, 85, 87, 123, 8, 26, 83, 114, 51, 6, 96,
+ -0x1c, 115, 49, 86, 103, 93, 31, -0x6, 112, 1, 7, -0xc, 18, 47, 5,
+ -0xf, 3 }, (v512s16)
+ { 4094, 64492, 855, 61680, 27, 47569, 55, 65534, 76, -1024, 30,
+ 254, 280, 11, 10, 14, 2, 4095, 3, 111, 65535, 67, 24289, 32, 25604,
+ 26, 65535, 98, 65533, 5 }, (v512s16)
+ { 615, 2032, 54, 52, 015, 45, 24, 6, 7, 68, 24015, 380, 3315, 5, 8,
+ 23, -0xf, 62, 82, 4 }, (v512s32)
+ { 54, 4294967294, 6, 16, 9, 13, 300, 80000000, 700, 600, 93, 040,
+ 15 }, (v512s32)
+ { 515, 3, 14, 10, 2, 80, 39, 215, 7, 3, 4 }, (v512s64)
+ { 4, 9, 5, 3, 505 }, (v512s64)
+ { 904293909041052234uwb, 4 }, (v512s128)
+ { 9223372036854775808uwb, 1, 4073709551612 }, (v512s128)
+ { 7, 30391054, 18446742974197923807uwb }, &x);
+ for (unsigned i = 0; i < sizeof(x); i++) xprintf("%02x", (int)(unsigned
char)x);
+}
--
2.53.0