As shown in the PR, simplify_gen_subreg call in simplify_replace_fn_rtx: (gdb) list 469 if (code == SUBREG) 470 { 471 op0 = simplify_replace_fn_rtx (SUBREG_REG (x), old_rtx, fn, data); 472 if (op0 == SUBREG_REG (x)) 473 return x; 474 op0 = simplify_gen_subreg (GET_MODE (x), op0, 475 GET_MODE (SUBREG_REG (x)), 476 SUBREG_BYTE (x)); 477 return op0 ? op0 : x; 478 }
simplifies with following arguments: (gdb) p debug_rtx (op0) (const_vector:V4QI [ (const_int -52 [0xffffffffffffffcc]) repeated x4 ]) (gdb) p debug_rtx (x) (subreg:V16QI (reg:V4QI 98) 0) to: (gdb) p debug_rtx (op0) (const_vector:V16QI [ (const_int -52 [0xffffffffffffffcc]) repeated x16 ]) This simplification is invalid, it is not possible to get V16QImode vector from V4QImode vector, even when all elements are duplicates. The simplification happens in simplify_context::simplify_subreg: (gdb) list 7558 if (VECTOR_MODE_P (outermode) 7559 && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode) 7560 && vec_duplicate_p (op, &elt)) 7561 return gen_vec_duplicate (outermode, elt); but the above simplification is valid only for non-paradoxical registers, where outermode <= innermode. We should not assume that elements outside the original register are valid, let alone all duplicates. PR target/110206 gcc/ChangeLog: * simplify-rtx.cc (simplify_context::simplify_subreg): Avoid returning a vector with duplicated value outside the original register. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr110206.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. OK for master and release branches? Uros.
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index d7315d82aa3..87ca25086dc 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -7557,6 +7557,7 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, if (VECTOR_MODE_P (outermode) && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode) + && !paradoxical_subreg_p (outermode, innermode) && vec_duplicate_p (op, &elt)) return gen_vec_duplicate (outermode, elt); diff --git a/gcc/testsuite/gcc.dg/torture/pr110206.c b/gcc/testsuite/gcc.dg/torture/pr110206.c new file mode 100644 index 00000000000..3a4f221ef47 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr110206.c @@ -0,0 +1,30 @@ +/* PR target/110206 */ +/* { dg-do run { target x86_64-*-* i?86-*-* } } */ + +typedef unsigned char __attribute__((__vector_size__ (4))) U; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned short u16; + +V g; + +void +__attribute__((noinline)) +foo (U u, u16 c, V *r) +{ + if (!c) + __builtin_abort (); + V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2); + V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5); + V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5); + *r = z; +} + +int +main (void) +{ + V r; + foo ((U){4}, 5, &r); + if (r[6] != 0x30) + __builtin_abort(); + return 0; +}