As shown in the PR, simplify_gen_subreg call in simplify_replace_fn_rtx:

(gdb) list
469           if (code == SUBREG)
470             {
471               op0 = simplify_replace_fn_rtx (SUBREG_REG (x),
old_rtx, fn, data);
472               if (op0 == SUBREG_REG (x))
473                 return x;
474               op0 = simplify_gen_subreg (GET_MODE (x), op0,
475                                          GET_MODE (SUBREG_REG (x)),
476                                          SUBREG_BYTE (x));
477               return op0 ? op0 : x;
478             }

simplifies with following arguments:

(gdb) p debug_rtx (op0)
(const_vector:V4QI [
        (const_int -52 [0xffffffffffffffcc]) repeated x4
    ])
(gdb) p debug_rtx (x)
(subreg:V16QI (reg:V4QI 98) 0)

to:

(gdb) p debug_rtx (op0)
(const_vector:V16QI [
        (const_int -52 [0xffffffffffffffcc]) repeated x16
    ])

This simplification is invalid, it is not possible to get V16QImode vector
from V4QImode vector, even when all elements are duplicates.

The simplification happens in simplify_context::simplify_subreg:

(gdb) list
7558          if (VECTOR_MODE_P (outermode)
7559              && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode)
7560              && vec_duplicate_p (op, &elt))
7561            return gen_vec_duplicate (outermode, elt);

but the above simplification is valid only for non-paradoxical registers,
where outermode <= innermode.  We should not assume that elements outside
the original register are valid, let alone all duplicates.

    PR target/110206

gcc/ChangeLog:

    * simplify-rtx.cc (simplify_context::simplify_subreg):
    Avoid returning a vector with duplicated value
    outside the original register.

gcc/testsuite/ChangeLog:

    * gcc.dg/torture/pr110206.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK for master and release branches?

Uros.
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index d7315d82aa3..87ca25086dc 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7557,6 +7557,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
 
       if (VECTOR_MODE_P (outermode)
          && GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode)
+         && !paradoxical_subreg_p (outermode, innermode)
          && vec_duplicate_p (op, &elt))
        return gen_vec_duplicate (outermode, elt);
 
diff --git a/gcc/testsuite/gcc.dg/torture/pr110206.c 
b/gcc/testsuite/gcc.dg/torture/pr110206.c
new file mode 100644
index 00000000000..3a4f221ef47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr110206.c
@@ -0,0 +1,30 @@
+/* PR target/110206 */
+/* { dg-do run { target x86_64-*-* i?86-*-* } } */
+
+typedef unsigned char __attribute__((__vector_size__ (4))) U;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short u16;
+
+V g;
+
+void
+__attribute__((noinline))
+foo (U u, u16 c, V *r)
+{
+  if (!c)
+    __builtin_abort ();
+  V x = __builtin_shufflevector (u, (204 >> u), 7, 0, 5, 1, 3, 5, 0, 2);
+  V y = __builtin_shufflevector (g, (V) { }, 7, 6, 6, 7, 2, 6, 3, 5);
+  V z = __builtin_shufflevector (y, 204 * x, 3, 9, 8, 1, 4, 6, 14, 5);
+  *r = z;
+}
+
+int
+main (void)
+{
+  V r;
+  foo ((U){4}, 5, &r);
+  if (r[6] != 0x30)
+    __builtin_abort();
+  return 0;
+}

Reply via email to