Re: [pushed][PATCH v2] LoongArch: Support immediate_operand for vec_cmp

Lulu Cheng Thu, 26 Dec 2024 19:44:38 -0800

Pushed to r15-6445.

在 2024/12/18 下午3:45, Jiahao Xu 写道:

We can't vectorize the code into instructions like vslti.w that compare
with immediate_operand, because we miss immediate_operand support for
integer comparisons.


gcc/ChangeLog:

        * config/loongarch/lasx.md (vec_cmp<mode><mode256_i>): Remove.
        (vec_cmpu<ILASX:mode><mode256_i>): Remove.
        * config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp):
        Ensure vector comparison instructions support CMP_OP1.
        * config/loongarch/lsx.md (vec_cmp<mode><mode_i>): Remove.
        (vec_cmpu<ILSX:mode><mode_i>): Remove.
        * config/loongarch/simd.md (ALLVEC, allmode_i): New mode iterators.
        (vec_cmp<mode><allmode_i>): New define_expand.
        (vec_cmpu<mode><allmode_i>): Likewise.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/vector/lasx/lasx-vcond-3.c: New test.
---
  gcc/config/loongarch/lasx.md                  | 25 ------
  gcc/config/loongarch/loongarch.cc             | 12 +++
  gcc/config/loongarch/lsx.md                   | 25 ------
  gcc/config/loongarch/simd.md                  | 40 +++++++++
  .../loongarch/vector/lasx/lasx-vcond-3.c      | 81 +++++++++++++++++++
  5 files changed, 133 insertions(+), 50 deletions(-)
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 90778dd8ff9..071a5cb1733 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -162,9 +162,6 @@ (define_c_enum "unspec" [
    UNSPEC_LASX_XVILVL_INTERNAL
  ])

-;; All vector modes with 256 bits.

-(define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI])
-
  ;; Only used for splitting insert_d and copy_{u,s}.d.
  (define_mode_iterator LASX_D [V4DI V4DF])

@@ -1365,28 +1362,6 @@ (define_insn "lasx_xvs<ICC:icc>_<ILASX:lasxfmt><cmpi_1>"

    [(set_attr "type" "simd_int_arith")
     (set_attr "mode" "<MODE>")])

-(define_expand "vec_cmp<mode><mode256_i>"

-  [(set (match_operand:<VIMODE256> 0 "register_operand")
-       (match_operator 1 ""
-         [(match_operand:LASX 2 "register_operand")
-          (match_operand:LASX 3 "register_operand")]))]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
-(define_expand "vec_cmpu<ILASX:mode><mode256_i>"
-  [(set (match_operand:<VIMODE256> 0 "register_operand")
-       (match_operator 1 ""
-         [(match_operand:ILASX 2 "register_operand")
-          (match_operand:ILASX 3 "register_operand")]))]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
  (define_insn "lasx_xvfclass_<flasxfmt>"
    [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
        (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 125ecc26c9c..2d4290bc2d1 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -10399,19 +10399,29 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code 
cond, rtx op0, rtx op1)
        switch (cond)
        {
        case NE:
+         if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
+           op1 = force_reg (cmp_mode, op1);
          cond = reverse_condition (cond);
          negate = true;
          break;
        case EQ:
        case LT:
        case LE:
+         if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
+           op1 = force_reg (cmp_mode, op1);
+         break;
        case LTU:
        case LEU:
+         if (!loongarch_const_vector_same_int_p (op1, cmp_mode, 0, 31))
+           op1 = force_reg (cmp_mode, op1);
          break;
        case GE:
        case GT:
        case GEU:
        case GTU:
+         /* Only supports reg-reg comparison.  */
+         if (!register_operand (op1, cmp_mode))
+           op1 = force_reg (cmp_mode, op1);
          std::swap (op0, op1);
          cond = swap_condition (cond);
          break;
@@ -10427,6 +10437,8 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, 
rtx op0, rtx op1)
      case E_V2DFmode:
      case E_V8SFmode:
      case E_V4DFmode:
+      if (!register_operand (op1, cmp_mode))
+       op1 = force_reg (cmp_mode, op1);
        loongarch_emit_binary (cond, dest, op0, op1);
        break;

diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md

index 2466d8c87be..878ff11e1ac 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -183,9 +183,6 @@ (define_mode_attr VD2MODE
     (V8HI "V2DI")
     (V16QI "V4SI")])

-;; All vector modes with 128 bits.

-(define_mode_iterator LSX      [V2DF V4SF V2DI V4SI V8HI V16QI])
-
  ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d.
  (define_mode_iterator LSX_D    [V2DI V2DF])

@@ -508,28 +505,6 @@ (define_expand "vec_set<mode>"

    DONE;
  })

-(define_expand "vec_cmp<mode><mode_i>"

-  [(set (match_operand:<VIMODE> 0 "register_operand")
-       (match_operator 1 ""
-         [(match_operand:LSX 2 "register_operand")
-          (match_operand:LSX 3 "register_operand")]))]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
-(define_expand "vec_cmpu<ILSX:mode><mode_i>"
-  [(set (match_operand:<VIMODE> 0 "register_operand")
-       (match_operator 1 ""
-         [(match_operand:ILSX 2 "register_operand")
-          (match_operand:ILSX 3 "register_operand")]))]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
  (define_expand "vcond_mask_<mode><mode_i>"
    [(match_operand:LSX 0 "register_operand")
     (match_operand:LSX 1 "reg_or_m1_operand")
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 45ea114220e..fc3d98a4340 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -29,12 +29,21 @@ (define_mode_iterator FLSX    [V2DF V4SF])
  ;; FP modes supported by LASX
  (define_mode_iterator FLASX   [V4DF V8SF])

+;; All modes supported by LSX

+(define_mode_iterator LSX    [ILSX FLSX])
+
+;; ALL modes supported by LASX
+(define_mode_iterator LASX   [ILASX FLASX])
+
  ;; All integer modes available
  (define_mode_iterator IVEC    [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])

;; All FP modes available

  (define_mode_iterator FVEC    [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])

+;; All vector modes available

+(define_mode_iterator ALLVEC  [(LSX "ISA_HAS_LSX") (LASX "ISA_HAS_LASX")])
+
  ;; Mnemonic prefix, "x" for LASX modes.
  (define_mode_attr x [(V2DI "") (V4SI "") (V8HI "") (V16QI "")
                     (V2DF "") (V4SF "")
@@ -72,6 +81,14 @@ (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
  (define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si")
                          (V4DF "v4di") (V8SF "v8si")])

+;; Integer vector modes with the same size, in lower-case.

+(define_mode_attr allmode_i [(V2DI "v2di") (V4SI "v4si")
+              (V8HI "v8hi") (V16QI "v16qi")
+              (V2DF "v2di") (V4SF "v4si")
+              (V4DI "v4di") (V8SI "v8si")
+              (V16HI "v16hi") (V32QI "v32qi")
+              (V4DF "v4di") (V8SF "v8si")])
+
  ;; Suffix for LSX or LASX instructions.
  (define_mode_attr simdfmt [(V2DF "d") (V4DF "d")
                           (V4SF "s") (V8SF "s")
@@ -476,6 +493,29 @@ (define_insn "neg<mode>2"
    [(set_attr "type" "simd_logic")
     (set_attr "mode" "<MODE>")])

+;; vector compare

+(define_expand "vec_cmp<mode><allmode_i>"
+  [(set (match_operand:<VIMODE> 0 "register_operand")
+    (match_operator 1 ""
+      [(match_operand:ALLVEC 2 "register_operand")
+       (match_operand:ALLVEC 3 "nonmemory_operand")]))]
+  ""
+{
+  loongarch_expand_vec_cmp (operands);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><allmode_i>"
+  [(set (match_operand:<VIMODE> 0 "register_operand")
+    (match_operator 1 ""
+      [(match_operand:IVEC 2 "register_operand")
+       (match_operand:IVEC 3 "nonmemory_operand")]))]
+  ""
+{
+  loongarch_expand_vec_cmp (operands);
+  DONE;
+})
+
  ; The LoongArch SX Instructions.
  (include "lsx.md")

diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c

new file mode 100644
index 00000000000..17545f44521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-3.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model 
-mlasx" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX, IMM)  \
+  void __attribute__ ((noinline, noclone))                     \
+  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
+                                  DATA_TYPE *__restrict__ x,   \
+                                  DATA_TYPE *__restrict__ y,   \
+                                  CMP_TYPE *__restrict__ a,    \
+                                  int n)                       \
+  {                                                            \
+    for (int i = 0; i < n; i++)                                     \
+      {                                                                \
+       DATA_TYPE xval = x[i], yval = y[i];                     \
+       CMP_TYPE aval = a[i], bval = IMM;                       \
+       r[i] = aval COND bval ? xval : yval;                    \
+      }                                                                \
+  }
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
+  T (int8_t, int8_t, COND, SUFFIX, 0)                  \
+  T (int16_t, int16_t, COND, SUFFIX, 0)                        \
+  T (int32_t, int32_t, COND, SUFFIX, 0)                        \
+  T (int64_t, int64_t, COND, SUFFIX, 0)                        \
+  T (float, int32_t, COND, SUFFIX##_float, 0)          \
+  T (double, int64_t, COND, SUFFIX##_double, 0)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
+  T (uint8_t, uint8_t, COND, SUFFIX, 2)                        \
+  T (uint16_t, uint16_t, COND, SUFFIX, 2)              \
+  T (uint32_t, uint32_t, COND, SUFFIX, 2)              \
+  T (uint64_t, uint64_t, COND, SUFFIX, 2)              \
+  T (float, uint32_t, COND, SUFFIX##_float, 2)         \
+  T (double, uint64_t, COND, SUFFIX##_double, 2)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX)      \
+  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)    \
+  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T)                                \
+  TEST_COND_VAR_ALL (T, <, _lt)                     \
+  TEST_COND_VAR_ALL (T, <=, _le)            \
+  TEST_COND_VAR_ALL (T, ==, _eq)               \
+  TEST_COND_VAR_ALL (T, !=, _ne)
+
+TEST_VAR_ALL (DEF_VCOND_VAR)
+
+/* { dg-final { scan-assembler-times {\txvslti\.b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\txvslti\.h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\txvslti\.w\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslti\.d\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslti\.b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvslti\.h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvslti\.w\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslti\.d\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.w\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.d\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.b\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.h\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.w\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.d\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.bu\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.hu\t} 2 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.wu\t} 4 } } */
+/* { dg-final { scan-assembler-times {\txvslei\.du\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.bu\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.hu\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.wu\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tvslei\.du\t} 4 } } */
+/* { dg-final { scan-assembler-times {\txvseqi\.b\t} 4 } } */
+/* { dg-final { scan-assembler-times {\txvseqi\.h\t} 4 } } */
+/* { dg-final { scan-assembler-times {\txvseqi\.w\t} 8 } } */
+/* { dg-final { scan-assembler-times {\txvseqi\.d\t} 8 } } */
+/* { dg-final { scan-assembler-times {\tvseqi\.b\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tvseqi\.h\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tvseqi\.w\t} 8 } } */
+/* { dg-final { scan-assembler-times {\tvseqi\.d\t} 8 } } */

Re: [pushed][PATCH v2] LoongArch: Support immediate_operand for vec_cmp

Reply via email to