Replace and remove do_sat_bhs. This avoids multiple repetitions of INT*_MIN/MAX.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/sve_helper.c | 116 +++++++++++++++--------------------- 1 file changed, 48 insertions(+), 68 deletions(-) diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index c206ca65ce..e84cffd0b0 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -523,14 +523,9 @@ DO_ZPZZ(sve2_uhsub_zpzz_h, uint16_t, H1_2, DO_HSUB_BHS) DO_ZPZZ(sve2_uhsub_zpzz_s, uint32_t, H1_4, DO_HSUB_BHS) DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D) -static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max) -{ - return val >= max ? max : val <= min ? min : val; -} - -#define DO_SQADD_B(n, m) do_sat_bhs((int64_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SQADD_H(n, m) do_sat_bhs((int64_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SQADD_S(n, m) do_sat_bhs((int64_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SQADD_B(n, m) do_ssat_b((int64_t)n + m) +#define DO_SQADD_H(n, m) do_ssat_h((int64_t)n + m) +#define DO_SQADD_S(n, m) do_ssat_s((int64_t)n + m) static inline int64_t do_sqadd_d(int64_t n, int64_t m) { @@ -547,9 +542,9 @@ DO_ZPZZ(sve2_sqadd_zpzz_h, int16_t, H1_2, DO_SQADD_H) DO_ZPZZ(sve2_sqadd_zpzz_s, int32_t, H1_4, DO_SQADD_S) DO_ZPZZ_D(sve2_sqadd_zpzz_d, int64_t, do_sqadd_d) -#define DO_UQADD_B(n, m) do_sat_bhs((int64_t)n + m, 0, UINT8_MAX) -#define DO_UQADD_H(n, m) do_sat_bhs((int64_t)n + m, 0, UINT16_MAX) -#define DO_UQADD_S(n, m) do_sat_bhs((int64_t)n + m, 0, UINT32_MAX) +#define DO_UQADD_B(n, m) do_usat_b((int64_t)n + m) +#define DO_UQADD_H(n, m) do_usat_h((int64_t)n + m) +#define DO_UQADD_S(n, m) do_usat_s((int64_t)n + m) static inline uint64_t do_uqadd_d(uint64_t n, uint64_t m) { @@ -562,9 +557,9 @@ DO_ZPZZ(sve2_uqadd_zpzz_h, uint16_t, H1_2, DO_UQADD_H) DO_ZPZZ(sve2_uqadd_zpzz_s, uint32_t, H1_4, DO_UQADD_S) DO_ZPZZ_D(sve2_uqadd_zpzz_d, uint64_t, do_uqadd_d) -#define DO_SQSUB_B(n, m) do_sat_bhs((int64_t)n - m, INT8_MIN, INT8_MAX) -#define DO_SQSUB_H(n, m) do_sat_bhs((int64_t)n - m, INT16_MIN, INT16_MAX) -#define DO_SQSUB_S(n, m) do_sat_bhs((int64_t)n - m, INT32_MIN, INT32_MAX) +#define DO_SQSUB_B(n, m) do_ssat_b((int64_t)n - m) +#define DO_SQSUB_H(n, m) do_ssat_h((int64_t)n - m) +#define DO_SQSUB_S(n, m) do_ssat_s((int64_t)n - m) static inline int64_t do_sqsub_d(int64_t n, int64_t m) { @@ -581,9 +576,9 @@ DO_ZPZZ(sve2_sqsub_zpzz_h, int16_t, H1_2, DO_SQSUB_H) DO_ZPZZ(sve2_sqsub_zpzz_s, int32_t, H1_4, DO_SQSUB_S) DO_ZPZZ_D(sve2_sqsub_zpzz_d, int64_t, do_sqsub_d) -#define DO_UQSUB_B(n, m) do_sat_bhs((int64_t)n - m, 0, UINT8_MAX) -#define DO_UQSUB_H(n, m) do_sat_bhs((int64_t)n - m, 0, UINT16_MAX) -#define DO_UQSUB_S(n, m) do_sat_bhs((int64_t)n - m, 0, UINT32_MAX) +#define DO_UQSUB_B(n, m) do_usat_b((int64_t)n - m) +#define DO_UQSUB_H(n, m) do_usat_h((int64_t)n - m) +#define DO_UQSUB_S(n, m) do_usat_s((int64_t)n - m) static inline uint64_t do_uqsub_d(uint64_t n, uint64_t m) { @@ -595,12 +590,9 @@ DO_ZPZZ(sve2_uqsub_zpzz_h, uint16_t, H1_2, DO_UQSUB_H) DO_ZPZZ(sve2_uqsub_zpzz_s, uint32_t, H1_4, DO_UQSUB_S) DO_ZPZZ_D(sve2_uqsub_zpzz_d, uint64_t, do_uqsub_d) -#define DO_SUQADD_B(n, m) \ - do_sat_bhs((int64_t)(int8_t)n + m, INT8_MIN, INT8_MAX) -#define DO_SUQADD_H(n, m) \ - do_sat_bhs((int64_t)(int16_t)n + m, INT16_MIN, INT16_MAX) -#define DO_SUQADD_S(n, m) \ - do_sat_bhs((int64_t)(int32_t)n + m, INT32_MIN, INT32_MAX) +#define DO_SUQADD_B(n, m) do_ssat_b((int64_t)(int8_t)n + m) +#define DO_SUQADD_H(n, m) do_ssat_h((int64_t)(int16_t)n + m) +#define DO_SUQADD_S(n, m) do_ssat_s((int64_t)(int32_t)n + m) static inline int64_t do_suqadd_d(int64_t n, uint64_t m) { @@ -630,12 +622,9 @@ DO_ZPZZ(sve2_suqadd_zpzz_h, uint16_t, H1_2, DO_SUQADD_H) DO_ZPZZ(sve2_suqadd_zpzz_s, uint32_t, H1_4, DO_SUQADD_S) DO_ZPZZ_D(sve2_suqadd_zpzz_d, uint64_t, do_suqadd_d) -#define DO_USQADD_B(n, m) \ - do_sat_bhs((int64_t)n + (int8_t)m, 0, UINT8_MAX) -#define DO_USQADD_H(n, m) \ - do_sat_bhs((int64_t)n + (int16_t)m, 0, UINT16_MAX) -#define DO_USQADD_S(n, m) \ - do_sat_bhs((int64_t)n + (int32_t)m, 0, UINT32_MAX) +#define DO_USQADD_B(n, m) do_usat_b((int64_t)n + (int8_t)m) +#define DO_USQADD_H(n, m) do_usat_h((int64_t)n + (int16_t)m) +#define DO_USQADD_S(n, m) do_usat_s((int64_t)n + (int32_t)m) static inline uint64_t do_usqadd_d(uint64_t n, int64_t m) { @@ -1222,37 +1211,29 @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ } \ } -#define DO_SQXTN_H(n) do_sat_bhs(n, INT8_MIN, INT8_MAX) -#define DO_SQXTN_S(n) do_sat_bhs(n, INT16_MIN, INT16_MAX) -#define DO_SQXTN_D(n) do_sat_bhs(n, INT32_MIN, INT32_MAX) +DO_XTNB(sve2_sqxtnb_h, int16_t, do_ssat_b) +DO_XTNB(sve2_sqxtnb_s, int32_t, do_ssat_h) +DO_XTNB(sve2_sqxtnb_d, int64_t, do_ssat_s) -DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H) -DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S) -DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D) +DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, do_ssat_b) +DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, do_ssat_h) +DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, do_ssat_s) -DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H) -DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S) -DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D) +DO_XTNB(sve2_uqxtnb_h, uint16_t, do_usat_b) +DO_XTNB(sve2_uqxtnb_s, uint32_t, do_usat_h) +DO_XTNB(sve2_uqxtnb_d, uint64_t, do_usat_s) -#define DO_UQXTN_H(n) do_sat_bhs(n, 0, UINT8_MAX) -#define DO_UQXTN_S(n) do_sat_bhs(n, 0, UINT16_MAX) -#define DO_UQXTN_D(n) do_sat_bhs(n, 0, UINT32_MAX) +DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, do_usat_b) +DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, do_usat_h) +DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, do_usat_s) -DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H) -DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S) -DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D) +DO_XTNB(sve2_sqxtunb_h, int16_t, do_usat_b) +DO_XTNB(sve2_sqxtunb_s, int32_t, do_usat_h) +DO_XTNB(sve2_sqxtunb_d, int64_t, do_usat_s) -DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D) - -DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H) -DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S) -DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D) - -DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H) -DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S) -DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D) +DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, do_usat_b) +DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, do_usat_h) +DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, do_usat_s) #undef DO_XTNB #undef DO_XTNT @@ -2183,10 +2164,9 @@ DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr) DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr) DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr) -#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX) -#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX) -#define DO_SQSHRUN_D(x, sh) \ - do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX) +#define DO_SQSHRUN_H(x, sh) do_usat_b((int64_t)(x) >> sh) +#define DO_SQSHRUN_S(x, sh) do_usat_h((int64_t)(x) >> sh) +#define DO_SQSHRUN_D(x, sh) do_usat_s((int64_t)(x) >> (sh < 64 ? sh : 63)) DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H) DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S) @@ -2196,9 +2176,9 @@ DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H) DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S) DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D) -#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX) -#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX) -#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX) +#define DO_SQRSHRUN_H(x, sh) do_usat_b(do_srshr(x, sh)) +#define DO_SQRSHRUN_S(x, sh) do_usat_h(do_srshr(x, sh)) +#define DO_SQRSHRUN_D(x, sh) do_usat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H) DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S) @@ -2208,9 +2188,9 @@ DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H) DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S) DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D) -#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX) -#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX) -#define DO_SQSHRN_D(x, sh) do_sat_bhs(x >> sh, INT32_MIN, INT32_MAX) +#define DO_SQSHRN_H(x, sh) do_ssat_b(x >> sh) +#define DO_SQSHRN_S(x, sh) do_ssat_h(x >> sh) +#define DO_SQSHRN_D(x, sh) do_ssat_s(x >> sh) DO_SHRNB(sve2_sqshrnb_h, int16_t, uint8_t, DO_SQSHRN_H) DO_SHRNB(sve2_sqshrnb_s, int32_t, uint16_t, DO_SQSHRN_S) @@ -2220,9 +2200,9 @@ DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H) DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S) DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D) -#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX) -#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX) -#define DO_SQRSHRN_D(x, sh) do_sat_bhs(do_srshr(x, sh), INT32_MIN, INT32_MAX) +#define DO_SQRSHRN_H(x, sh) do_ssat_b(do_srshr(x, sh)) +#define DO_SQRSHRN_S(x, sh) do_ssat_h(do_srshr(x, sh)) +#define DO_SQRSHRN_D(x, sh) do_ssat_s(do_srshr(x, sh)) DO_SHRNB(sve2_sqrshrnb_h, int16_t, uint8_t, DO_SQRSHRN_H) DO_SHRNB(sve2_sqrshrnb_s, int32_t, uint16_t, DO_SQRSHRN_S) -- 2.43.0