Hi,

This looks like an acceptable work around. We special case behavior that I'm not sure we can express in ways GCC can understand or will make use of, whilst at the same time we keep expressing behavior it does understand and can optimize.

Nice idea!

LGTM, needs maintainer approval though.

Kind regards,
Andre

On 07/05/2024 17:19, Christophe Lyon wrote:
In this PR, we have to handle a case where MVE predicates are supplied
as a const_int, where individual predicates have illegal boolean
values (such as 0xc for a 4-bit boolean predicate).  To avoid the ICE,
we hide the constant behind an unspec.

On MVE V8BI and V4BI multi-bit masks are interpreted byte-by-byte at
instruction level, see
https://developer.arm.com/documentation/101028/0012/14--M-profile-Vector-Extension--MVE--intrinsics.

This is a workaround until we change such predicates representation to
V16BImode.

2024-05-06  Christophe Lyon  <christophe.l...@linaro.org>
            Jakub Jelinek  <ja...@redhat.com>

        PR target/114801
        gcc/
        * config/arm/arm-mve-builtins.cc
        (function_expander::add_input_operand): Handle CONST_INT
        predicates.
        * mve.md (set_mve_const_pred): New pattern.
        * unspec.md (MVE_PRED): New unspec.

        gcc/testsuite/
        * gcc.target/arm/mve/pr114801.c: New test.
---
  gcc/config/arm/arm-mve-builtins.cc          | 27 ++++++++++++++-
  gcc/config/arm/mve.md                       | 12 +++++++
  gcc/config/arm/unspecs.md                   |  1 +
  gcc/testsuite/gcc.target/arm/mve/pr114801.c | 37 +++++++++++++++++++++
  4 files changed, 76 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/gcc.target/arm/mve/pr114801.c

diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index 6a5775c67e5..7d5af649857 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -2205,7 +2205,32 @@ function_expander::add_input_operand (insn_code icode, 
rtx x)
        mode = GET_MODE (x);
      }
    else if (VALID_MVE_PRED_MODE (mode))
-    x = gen_lowpart (mode, x);
+    {
+      if (CONST_INT_P (x) && (mode == V8BImode || mode == V4BImode))
+       {
+         /* In V8BI or V4BI each element has 2 or 4 bits, if those
+            bits aren't all the same, gen_lowpart might ICE.  Hide
+            the move behind an unspec to avoid this.
+            V8BI and V4BI multi-bit masks are interpreted
+            byte-by-byte at instruction level, see
+            
https://developer.arm.com/documentation/101028/0012/14--M-profile-Vector-Extension--MVE--intrinsics.
  */
+         unsigned HOST_WIDE_INT xi = UINTVAL (x);
+         if ((xi & 0x5555) != ((xi >> 1) & 0x5555)
+             || (mode == V4BImode
+                 && (xi & 0x3333) != ((xi >> 2) & 0x3333)))
+           {
+             rtx unspec_x;
+             unspec_x = gen_rtx_UNSPEC (HImode, gen_rtvec (1, x), MVE_PRED);
+             x = force_reg (HImode, unspec_x);
+           }
+
+       }
+      else if (SUBREG_P (x))
+       /* gen_lowpart on a SUBREG can ICE.  */
+       x = force_reg (GET_MODE (x), x);
+
+      x = gen_lowpart (mode, x);
+    }
m_ops.safe_grow (m_ops.length () + 1, true);
    create_input_operand (&m_ops.last (), x, mode);
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 35916f62604..d337422d695 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -6621,3 +6621,15 @@ (define_expand "@arm_mve_reinterpret<mode>"
        }
    }
  )
+
+;; Hide predicate constants from optimizers
+(define_insn "set_mve_const_pred"
+ [(set
+   (match_operand:HI 0 "s_register_operand" "=r")
+   (unspec:HI [(match_operand:HI 1 "general_operand" "n")] MVE_PRED))]
+  "TARGET_HAVE_MVE"
+{
+    return "movw%?\t%0, %L1\t%@ set_mve_const_pred";
+}
+  [(set_attr "type" "mov_imm")]
+)
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 4713ec840ab..336f2fe08e6 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1256,4 +1256,5 @@ (define_c_enum "unspec" [
    SQRSHRL_48
    VSHLCQ_M_
    REINTERPRET
+  MVE_PRED
  ])
diff --git a/gcc/testsuite/gcc.target/arm/mve/pr114801.c 
b/gcc/testsuite/gcc.target/arm/mve/pr114801.c
new file mode 100644
index 00000000000..fb3e4d855f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/pr114801.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_mve.h>
+
+/*
+** test_32:
+**...
+**     movw    r[0-9]+, 52428  @ set_mve_const_pred
+**...
+*/
+uint32x4_t test_32() {
+  return vdupq_m_n_u32(vdupq_n_u32(0xffffffff), 0, 0xcccc);
+}
+
+/*
+** test_16:
+**...
+**     movw    r[0-9]+, 6927   @ set_mve_const_pred
+**...
+*/
+uint16x8_t test_16() {
+  return vdupq_m_n_u16(vdupq_n_u16(0xffff), 0, 0x1b0f);
+}
+
+/*
+** test_8:
+**...
+**     mov     r[0-9]+, #23055 @ movhi
+**...
+*/
+uint8x16_t test_8() {
+  return vdupq_m_n_u8(vdupq_n_u8(0xff), 0, 0x5a0f);
+}

Reply via email to