On Thu, Sep 25, 2025 at 02:10:19PM -0500, Nathan Bossart wrote: > Here's a new version of 0002 with a modified SSE2 implementation, as > discussed elsewhere [0]. This allows us to remove vector8_ssub().
Sorry for the noise. v3 fixes the mixed-declarations-and-code problems. -- nathan
>From 2d355136c9b6aede2c1cb6f8f08fa7eacd032b61 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Mon, 22 Sep 2025 16:17:09 -0500 Subject: [PATCH v3 1/1] Optimize vector8_has_le() on AArch64. --- src/include/port/simd.h | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/src/include/port/simd.h b/src/include/port/simd.h index 97c5f353022..79dd1f76962 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -86,7 +86,6 @@ static inline uint32 vector8_highbit_mask(const Vector8 v); static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2); #ifndef USE_NO_SIMD static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2); -static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2); #endif /* @@ -213,6 +212,10 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c) { bool result = false; +#ifdef USE_SSE2 + Vector8 umin; + Vector8 cmpe; +#endif /* pre-compute the result for assert checking */ #ifdef USE_ASSERT_CHECKING @@ -250,14 +253,12 @@ vector8_has_le(const Vector8 v, const uint8 c) } } } -#else - - /* - * Use saturating subtraction to find bytes <= c, which will present as - * NUL bytes. This approach is a workaround for the lack of unsigned - * comparison instructions on some architectures. - */ - result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c))); +#elif defined(USE_SSE2) + umin = _mm_min_epu8(v, vector8_broadcast(c)); + cmpe = _mm_cmpeq_epi8(umin, v); + result = vector8_is_highbit_set(cmpe); +#elif defined(USE_NEON) + result = vminvq_u8(v) <= c; #endif Assert(assert_result == result); @@ -358,24 +359,6 @@ vector32_or(const Vector32 v1, const Vector32 v2) } #endif /* ! USE_NO_SIMD */ -/* - * Return the result of subtracting the respective elements of the input - * vectors using saturation (i.e., if the operation would yield a value less - * than zero, zero is returned instead). For more information on saturation - * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic - */ -#ifndef USE_NO_SIMD -static inline Vector8 -vector8_ssub(const Vector8 v1, const Vector8 v2) -{ -#ifdef USE_SSE2 - return _mm_subs_epu8(v1, v2); -#elif defined(USE_NEON) - return vqsubq_u8(v1, v2); -#endif -} -#endif /* ! USE_NO_SIMD */ - /* * Return a vector with all bits set in each lane where the corresponding * lanes in the inputs are equal. -- 2.39.5 (Apple Git-154)