Hi!

Since PR37922 fix RTL DSE has hard register conflict checking
in replace_read, so that if the replacement sequence sets (or typically just
clobbers) some hard register (usually condition codes) we verify that
hard register is not live.
Unfortunately, it compares the hard reg set clobbered/set by the sequence
(regs_set) against the currently live hard register set, but it then
emits the insn sequence not at the current insn position, but before
store_insn->insn.
So, we should not compare against the current live hard register set,
but against the hard register live set at the point of the store insn.
Fortunately, we already have that remembered in store_insn->fixed_regs_live.

In addition to bootstrapping/regtesting this patch on x86_64-linux and
i686-linux, I've also added statistics gathering and it seems the only
place where we end up rejecting the replace_read is the newly added
testcase (the PR37922 is no longer effective at that) and fixed_regs_live
has been always non-NULL at the if (store_insn->fixed_regs_live) spot.
Rather than having there an assert, I chose to just keep regs_set
as is, which means in that hypothetical case where fixed_regs_live wouldn't
be computed for some store we'd still accept sequences that don't
clobber/set any hard registers and just punt on those that clobber/set
those.

Ok for trunk?

2021-04-03  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/99863
        * dse.c (replace_read): Drop regs_live argument.  Instead of
        regs_live, use store_insn->fixed_regs_live if non-NULL,
        otherwise punt if insns sequence clobbers or sets any hard
        registers.

        * gcc.target/i386/pr99863.c: New test.

--- gcc/dse.c.jj        2021-01-28 09:59:11.973750676 +0100
+++ gcc/dse.c   2021-04-01 15:16:22.003913061 +0200
@@ -1970,8 +1970,7 @@ get_stored_val (store_info *store_info,
 
 static bool
 replace_read (store_info *store_info, insn_info_t store_insn,
-             read_info_t read_info, insn_info_t read_insn, rtx *loc,
-             bitmap regs_live)
+             read_info_t read_info, insn_info_t read_insn, rtx *loc)
 {
   machine_mode store_mode = GET_MODE (store_info->mem);
   machine_mode read_mode = GET_MODE (read_info->mem);
@@ -2040,7 +2039,8 @@ replace_read (store_info *store_info, in
          note_stores (this_insn, look_for_hardregs, regs_set);
        }
 
-      bitmap_and_into (regs_set, regs_live);
+      if (store_insn->fixed_regs_live)
+       bitmap_and_into (regs_set, store_insn->fixed_regs_live);
       if (!bitmap_empty_p (regs_set))
        {
          if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2286,7 +2286,7 @@ check_mem_read_rtx (rtx *loc, bb_info_t
                                                 offset - store_info->offset,
                                                 width)
                      && replace_read (store_info, i_ptr, read_info,
-                                      insn_info, loc, bb_info->regs_live))
+                                      insn_info, loc))
                    return;
 
                  /* The bases are the same, just see if the offsets
@@ -2352,8 +2352,7 @@ check_mem_read_rtx (rtx *loc, bb_info_t
                                   store_info->width)
              && all_positions_needed_p (store_info,
                                         offset - store_info->offset, width)
-             && replace_read (store_info, i_ptr,  read_info, insn_info, loc,
-                              bb_info->regs_live))
+             && replace_read (store_info, i_ptr,  read_info, insn_info, loc))
            return;
 
          remove = canon_true_dependence (store_info->mem,
--- gcc/testsuite/gcc.target/i386/pr99863.c.jj  2021-04-01 15:43:05.284267327 
+0200
+++ gcc/testsuite/gcc.target/i386/pr99863.c     2021-04-01 15:42:52.882403726 
+0200
@@ -0,0 +1,33 @@
+/* PR rtl-optimization/99863 */
+/* { dg-do run } */
+/* { dg-options "-O -fno-tree-forwprop -mno-sse2 -Wno-psabi" } */
+
+typedef unsigned char __attribute__((__vector_size__ (8))) A;
+typedef unsigned char __attribute__((__vector_size__ (32))) B;
+typedef unsigned char __attribute__((__vector_size__ (64))) C;
+typedef unsigned int __attribute__((__vector_size__ (32))) D;
+typedef unsigned int __attribute__((__vector_size__ (64))) E;
+typedef unsigned long long F;
+
+D a;
+A b;
+
+A
+foo (E x, F y)
+{
+  D c = (y <= 0) * a;
+  x *= (0 < y);
+  C d = (C) x;
+  B e = ((union { C a; B b[2];}) d).b[0] + (B) c;
+  A f = ((union { B a; A b[4];}) e).b[0] + (A) b;
+  return f;
+}
+
+int
+main ()
+{
+  F x = (F) foo ((E) { 3 }, 5);
+  if (x != 3)
+    __builtin_abort ();
+  return 0;
+}

        Jakub

Reply via email to