From: Pan Li <pan2...@intel.com>

This patch would like to support the form 1 of the vector signed
integer SAT_SUB.  Aka below example:

Form 1:
  #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)                     \
  void __attribute__((noinline))                                       \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {                                                                    \
    unsigned i;                                                        \
    for (i = 0; i < limit; i++)                                        \
      {                                                                \
        T x = op_1[i];                                                 \
        T y = op_2[i];                                                 \
        T minus = (UT)x - (UT)y;                                       \
        out[i] = (x ^ y) >= 0                                          \
          ? minus                                                      \
          : (minus ^ x) >= 0                                           \
            ? minus                                                    \
            : x < 0 ? MIN : MAX;                                       \
      }                                                                \
  }

DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  91   │   _108 = .SELECT_VL (ivtmp_106, POLY_INT_CST [16, 16]);
  92   │   vect_x_16.11_80 = .MASK_LEN_LOAD (vectp_op_1.9_78, 8B, { -1, ... }, 
_108, 0);
  93   │   _69 = vect_x_16.11_80 >> 7;
  94   │   vect_x.12_81 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
char>(vect_x_16.11_80);
  95   │   vect_y_18.15_85 = .MASK_LEN_LOAD (vectp_op_2.13_83, 8B, { -1, ... }, 
_108, 0);
  96   │   vect__7.21_91 = vect_x_16.11_80 ^ vect_y_18.15_85;
  97   │   mask__44.22_92 = vect__7.21_91 < { 0, ... };
  98   │   vect_y.16_86 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned 
char>(vect_y_18.15_85);
  99   │   vect__6.17_87 = vect_x.12_81 - vect_y.16_86;
 100   │   vect_minus_19.18_88 = VIEW_CONVERT_EXPR<vector([16,16]) signed 
char>(vect__6.17_87);
 101   │   vect__8.19_89 = vect_x_16.11_80 ^ vect_minus_19.18_88;
 102   │   mask__42.20_90 = vect__8.19_89 < { 0, ... };
 103   │   mask__41.23_93 = mask__42.20_90 & mask__44.22_92;
 104   │   _4 = .COND_XOR (mask__41.23_93, _69, { 127, ... }, 
vect_minus_19.18_88);
 105   │   .MASK_LEN_STORE (vectp_out.31_102, 8B, { -1, ... }, _108, 0, _4);
 106   │   vectp_op_1.9_79 = vectp_op_1.9_78 + _108;
 107   │   vectp_op_2.13_84 = vectp_op_2.13_83 + _108;
 108   │   vectp_out.31_103 = vectp_out.31_102 + _108;
 109   │   ivtmp_107 = ivtmp_106 - _108;

After this patch:
  81   │   _102 = .SELECT_VL (ivtmp_100, POLY_INT_CST [16, 16]);
  82   │   vect_x_16.11_89 = .MASK_LEN_LOAD (vectp_op_1.9_87, 8B, { -1, ... }, 
_102, 0);
  83   │   vect_y_18.14_93 = .MASK_LEN_LOAD (vectp_op_2.12_91, 8B, { -1, ... }, 
_102, 0);
  84   │   vect_patt_38.15_94 = .SAT_SUB (vect_x_16.11_89, vect_y_18.14_93);
  85   │   .MASK_LEN_STORE (vectp_out.16_96, 8B, { -1, ... }, _102, 0, 
vect_patt_38.15_94);
  86   │   vectp_op_1.9_88 = vectp_op_1.9_87 + _102;
  87   │   vectp_op_2.12_92 = vectp_op_2.12_91 + _102;
  88   │   vectp_out.16_97 = vectp_out.16_96 + _102;
  89   │   ivtmp_101 = ivtmp_100 - _102;

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

        * match.pd: Add case 1 matching pattern for vector signed SAT_SUB.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/match.pd | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 8a7569ce387..a3c298d3a22 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3401,6 +3401,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Signed saturation sub, case 4:
+   T minus = (T)((UT)X - (UT)Y);
+   SAT_S_SUB = (X ^ Y) < 0 & (X ^ minus) < 0 ? (-(T)(X < 0) ^ MAX) : minus;
+
+   The T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_sub @0 @1)
+ (cond^ (bit_and:c (lt (bit_xor @0 (nop_convert@2 (minus (nop_convert @0)
+                                                        (nop_convert @1))))
+                      integer_zerop)
+                  (lt (bit_xor:c @0 @1) integer_zerop))
+       (bit_xor:c (nop_convert (negate (nop_convert (convert
+                                                     (lt @0 integer_zerop)))))
+                  max_value)
+       @2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type))))
+
 /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT).
    SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))).  */
 (match (unsigned_integer_sat_trunc @0)
-- 
2.43.0

Reply via email to