From: xuli <xu...@eswincomputing.com>
This patch would like to support vector SAT_ADD when one of the op
is singed IMM.
void __attribute__((noinline)) \
vec_sat_s_add_imm_##T##_fmt_1##_##INDEX (T *out, T *op_1, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
T x = op_1[i]; \
T sum = (UT)x + (UT)IMM; \
out[i] = (x ^ IMM) < 0 \
? sum \
: (sum ^ x) >= 0 \
? sum \
: x < 0 ? MIN : MAX; \
} \
}
Take below form1 as example:
DEF_VEC_SAT_S_ADD_IMM_FMT_1(0, int8_t, uint8_t, 9, INT8_MIN, INT8_MAX)
Before this patch:
__attribute__((noinline))
void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * restrict
op_1, unsigned int limit)
{
vector([16,16]) signed char * vectp_out.28;
vector([16,16]) signed char vect_iftmp.27;
vector([16,16]) <signed-boolean:1> mask__28.26;
vector([16,16]) <signed-boolean:1> mask__29.25;
vector([16,16]) <signed-boolean:1> mask__19.19;
vector([16,16]) <signed-boolean:1> mask__31.18;
vector([16,16]) signed char vect__6.17;
vector([16,16]) signed char vect__5.16;
vector([16,16]) signed char vect_sum_15.15;
vector([16,16]) unsigned char vect__4.14;
vector([16,16]) unsigned char vect_x.13;
vector([16,16]) signed char vect_x_14.12;
vector([16,16]) signed char * vectp_op_1.10;
vector([16,16]) <signed-boolean:1> _78;
vector([16,16]) unsigned char _79;
vector([16,16]) unsigned char _80;
unsigned long _92;
unsigned long ivtmp_93;
unsigned long ivtmp_94;
unsigned long _95;
<bb 2> [local count: 118111598]:
if (limit_12(D) != 0)
goto <bb 3>; [89.00%]
else
goto <bb 5>; [11.00%]
<bb 3> [local count: 105119322]:
_92 = (unsigned long) limit_12(D);
<bb 4> [local count: 955630226]:
# vectp_op_1.10_62 = PHI <vectp_op_1.10_63(4), op_1_13(D)(3)>
# vectp_out.28_89 = PHI <vectp_out.28_90(4), out_16(D)(3)>
# ivtmp_93 = PHI <ivtmp_94(4), _92(3)>
_95 = .SELECT_VL (ivtmp_93, POLY_INT_CST [16, 16]);
vect_x_14.12_64 = .MASK_LEN_LOAD (vectp_op_1.10_62, 8B, { -1, ... }, _95, 0);
vect_x.13_65 = VIEW_CONVERT_EXPR<vector([16,16]) unsigned
char>(vect_x_14.12_64);
vect__4.14_67 = vect_x.13_65 + { 9, ... };
vect_sum_15.15_68 = VIEW_CONVERT_EXPR<vector([16,16]) signed
char>(vect__4.14_67);
vect__5.16_70 = vect_x_14.12_64 ^ { 9, ... };
vect__6.17_71 = vect_x_14.12_64 ^ vect_sum_15.15_68;
mask__31.18_73 = vect__5.16_70 >= { 0, ... };
mask__19.19_75 = vect_x_14.12_64 < { 0, ... };
mask__29.25_85 = vect__6.17_71 < { 0, ... };
mask__28.26_86 = mask__31.18_73 & mask__29.25_85;
_78 = ~mask__28.26_86;
_79 = .VCOND_MASK (mask__19.19_75, { 128, ... }, { 127, ... });
_80 = .COND_ADD (_78, vect_x.13_65, { 9, ... }, _79);
vect_iftmp.27_87 = VIEW_CONVERT_EXPR<vector([16,16]) signed char>(_80);
.MASK_LEN_STORE (vectp_out.28_89, 8B, { -1, ... }, _95, 0, vect_iftmp.27_87);
vectp_op_1.10_63 = vectp_op_1.10_62 + _95;
vectp_out.28_90 = vectp_out.28_89 + _95;
ivtmp_94 = ivtmp_93 - _95;
if (ivtmp_94 != 0)
goto <bb 4>; [89.00%]
else
goto <bb 5>; [11.00%]
<bb 5> [local count: 118111600]:
return;
}
After this patch:
__attribute__((noinline))
void vec_sat_s_add_imm_int8_t_fmt_1_0 (int8_t * restrict out, int8_t * restrict
op_1, unsigned int limit)
{
vector([16,16]) signed char * vectp_out.12;
vector([16,16]) signed char vect_patt_10.11;
vector([16,16]) signed char vect_x_14.10;
vector([16,16]) signed char D.2852;
vector([16,16]) signed char * vectp_op_1.8;
vector([16,16]) signed char _73(D);
unsigned long _80;
unsigned long ivtmp_81;
unsigned long ivtmp_82;
unsigned long _83;
<bb 2> [local count: 118111598]:
if (limit_12(D) != 0)
goto <bb 3>; [89.00%]
else
goto <bb 5>; [11.00%]
<bb 3> [local count: 105119322]:
_80 = (unsigned long) limit_12(D);
<bb 4> [local count: 955630226]:
# vectp_op_1.8_71 = PHI <vectp_op_1.8_72(4), op_1_13(D)(3)>
# vectp_out.12_77 = PHI <vectp_out.12_78(4), out_16(D)(3)>
# ivtmp_81 = PHI <ivtmp_82(4), _80(3)>
_83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [16, 16]);
vect_x_14.10_74 = .MASK_LEN_LOAD (vectp_op_1.8_71, 8B, { -1, ... }, _73(D),
_83, 0);
vect_patt_10.11_75 = .SAT_ADD (vect_x_14.10_74, { 9, ... });
.MASK_LEN_STORE (vectp_out.12_77, 8B, { -1, ... }, _83, 0,
vect_patt_10.11_75);
vectp_op_1.8_72 = vectp_op_1.8_71 + _83;
vectp_out.12_78 = vectp_out.12_77 + _83;
ivtmp_82 = ivtmp_81 - _83;
if (ivtmp_82 != 0)
goto <bb 4>; [89.00%]
else
goto <bb 5>; [11.00%]
<bb 5> [local count: 118111600]:
return;
}
The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.
Signed-off-by: Li Xu <xu...@eswincomputing.com>
gcc/ChangeLog:
* match.pd: add singned vector SAT_ADD IMM form1 matching.