Hi,

(no subject last time...)

Since r16-4391-g85ab3a22ed11c9 we can use a punned type/mode for grouped
loads and stores.  Vineet reported an x264 wrong-code bug since that
commit.  The crux of the issue is that in avlprop we back-propagate
the AVL from consumers (like stores) to producers.
When e.g. a V4QI vector is type-punned by a V1SI vector
  (subreg:V1SI (reg:V4QI ...)
the AVL of that instruction refers to the outer subreg mode, i.e. for an
AVL of 1 in a store we store one SImode element.  The producer of the
store data is not type punned and still uses V4QI and we produce 4
QImode elements.  Due to this mismatch we back-propagate the consumer
AVL of 1 to the producers, causing wrong code.

This patch looks if the use is inside a subreg and scales the immediate
AVL by the ratio of inner and outer mode.

Regtested on rv64gcv_zvl512b.

Regards
 Robin

        PR target/122445

gcc/ChangeLog:

        * config/riscv/riscv-avlprop.cc 
(pass_avlprop::get_vlmax_ta_preferred_avl):
        Scale AVL of subreg uses.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr122445.c: New test.
---
 gcc/config/riscv/riscv-avlprop.cc             | 41 +++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr122445.c   | 26 ++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c

diff --git a/gcc/config/riscv/riscv-avlprop.cc 
b/gcc/config/riscv/riscv-avlprop.cc
index b8547a722c5..ee92f55ea81 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "df.h"
 #include "rtl-ssa.h"
+#include "rtl-iter.h"
 #include "cfgcleanup.h"
 #include "insn-attr.h"
 #include "tm-constrs.h"
@@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) 
const
                  && def1->insn ()->compare_with (insn) >= 0)
                return NULL_RTX;
            }
+         else
+           {
+             /* If the use is in a subreg e.g. in a store it is possible that
+                we punned the vector mode with a larger mode like
+                  (subreg:V1SI (reg:V4QI 123)).
+                For an AVL of 1 that means we actually store one SImode
+                element and not 1 QImode elements.  But the latter is what we
+                would propagate if we took the AVL operand literally.
+                Instead we scale it by the ratio of inner and outer mode
+                (4 in the example above).  */
+             int factor = 1;
+             if (use->includes_subregs ())
+               {
+                 subrtx_iterator::array_type array;
+                 FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)
+                   {
+                     const_rtx x = *iter;
+                     if (!x)
+                       break;
+                     if (SUBREG_P (x)
+                         && REGNO (SUBREG_REG (x)) == use->regno ()
+                         && known_eq (GET_MODE_SIZE (use->mode ()),
+                                      GET_MODE_SIZE (GET_MODE (x))))
+                       {
+                         if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()),
+                                              GET_MODE_NUNITS (GET_MODE (x)),
+                                              &factor))
+                           {
+                             gcc_assert (factor > 0);
+                             break;
+                           }
+                         else
+                           return NULL_RTX;
+                       }
+                   }
+               }
+
+             if (factor > 1)
+               new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);
+           }
 
          if (!use_avl)
            use_avl = new_use_avl;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
new file mode 100644
index 00000000000..47368684faa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122445.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcbv_zvl256b -mabi=lp64d -O3 -mrvv-vector-bits=zvl 
--param=riscv-autovec-mode=V4QI -mtune=generic-ooo -fdump-rtl-avlprop-all" } */
+
+typedef unsigned char uint8_t;
+typedef short int16_t;
+
+#define FDEC_STRIDE 32
+
+static inline uint8_t x264_clip_uint8( int x )
+{
+  return x;
+}
+
+void
+x264_add4x4_idct (uint8_t *p_dst, int16_t d[16])
+{
+  for( int y = 0; y < 4; y++ )
+    {
+      for( int x = 0; x < 4; x++ )
+        p_dst[x] = x264_clip_uint8( p_dst[x] + d[y*4+x] );
+      p_dst += FDEC_STRIDE;
+    }
+}
+
+/* { dg-final { scan-rtl-dump "Propagating AVL: \\(const_int 4" "avlprop" } } 
*/
+/* { dg-final { scan-rtl-dump-not "Propagating AVL: \\(const_int 1" "avlprop" 
} } */
-- 
2.51.0



-- 
Regards
 Robin

Reply via email to