I updated this patch with the following change (force pushed onto the forge) to extend the test coverage and fix the resulting fallout.
diff --git c/libstdc++-v3/include/bits/simd_loadstore.h w/libstdc++-v3/
include/bits/simd_loadstore.h
index 7ea9de0f6f98..753097b8052a 100644
--- c/libstdc++-v3/include/bits/simd_loadstore.h
+++ w/libstdc++-v3/include/bits/simd_loadstore.h
@@ -112,11 +112,12 @@ __glibcxx_simd_precondition(
unchecked_load(_Rg&& __r, const __load_mask_type_t<_Vp,
ranges::range_value_t<_Rg>>& __mask,
flags<_Flags...> __f = {})
{
- using _RV = __vec_load_return_t<_Vp, ranges::range_value_t<_Rg>>;
+ using _Tp = ranges::range_value_t<_Rg>;
+ using _RV = __vec_load_return_t<_Vp, _Tp>;
using _Rp = typename _RV::value_type;
- static_assert(__vectorizable<ranges::range_value_t<_Rg>>);
- static_assert(__explicitly_convertible_to<ranges::range_value_t<_Rg>,
_Rp>);
- static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>,
_Rp, _Flags...>,
+ static_assert(__vectorizable<_Tp>);
+ static_assert(__explicitly_convertible_to<_Tp, _Rp>);
+ static_assert(__loadstore_convertible_to<_Tp, _Rp, _Flags...>,
"'flag_convert' must be used for conversions that are not
value-preserving");
constexpr bool __allow_out_of_bounds =
__f._S_test(__allow_partial_loadstore);
@@ -135,8 +136,13 @@ __glibcxx_simd_precondition(
const size_t __rg_size = ranges::size(__r);
if consteval
{
- return _RV([&](size_t __i) {
- return __i < __rg_size && __mask[int(__i)] ? __r[__i] :
_Rp();
+ return _RV([&](size_t __i) -> _Rp {
+ if (__i >= __rg_size || !__mask[int(__i)])
+ return _Rp();
+ else if constexpr (__complex_like<_Rp> && !
__complex_like<_Tp>)
+ return static_cast<typename _Rp::value_type>(__r[__i]);
+ else
+ return static_cast<_Rp>(__r[__i]);
});
}
else
diff --git c/libstdc++-v3/include/bits/simd_x86.h w/libstdc++-v3/include/bits/
simd_x86.h
index 65c1c02e5f33..db4017c94fb6 100644
--- c/libstdc++-v3/include/bits/simd_x86.h
+++ w/libstdc++-v3/include/bits/simd_x86.h
@@ -1117,7 +1117,7 @@ __x86_masked_load(const _Up* __mem, unsigned_integral
auto __k)
{
return __vec_concat(
__x86_masked_load<__vec_builtin_type<_Tp, __n / 2>>(__mem,
__k),
- __x86_masked_load<__vec_builtin_type<_Tp, __n / 2>>(__mem,
__k >> __n / 2)
+ __x86_masked_load<__vec_builtin_type<_Tp, __n / 2>>(__mem +
__n / 2, __k >> __n / 2)
);
}
else if constexpr (sizeof(_TV) == 64)
diff --git c/libstdc++-v3/testsuite/std/simd/loads.cc w/libstdc++-v3/
testsuite/std/simd/loads.cc
index e4e67ab98791..c632383f76dc 100644
--- c/libstdc++-v3/testsuite/std/simd/loads.cc
+++ w/libstdc++-v3/testsuite/std/simd/loads.cc
@@ -91,12 +91,16 @@ make_iota_array()
static constexpr V ref_k_2 = select(M([](int i) { return i < 2; }),
ref_k, T());
ADD_TEST(masked_loads) {
- std::tuple {make_iota_array<T>(), alternating, M(true), M(false)},
- [](auto& t, auto mem, M k, M tr, M fa) {
+ std::tuple {make_iota_array<T>(), make_iota_array<int>(), alternating,
M(true), M(false)},
+ [](auto& t, auto mem, auto ints, M k, M tr, M fa) {
t.verify_equal(simd::unchecked_load<V>(mem, tr), ref);
t.verify_equal(simd::unchecked_load<V>(mem, fa), V());
t.verify_equal(simd::unchecked_load<V>(mem, k), ref_k);
+ t.verify_equal(simd::unchecked_load<V>(ints, tr, simd::flag_convert),
ref);
+ t.verify_equal(simd::unchecked_load<V>(ints, fa, simd::flag_convert),
V());
+ t.verify_equal(simd::unchecked_load<V>(ints, k, simd::flag_convert),
ref_k);
+
t.verify_equal(simd::partial_load<V>(mem, tr), ref);
t.verify_equal(simd::partial_load<V>(mem, fa), V());
t.verify_equal(simd::partial_load<V>(mem, k), ref_k);
@@ -104,6 +108,13 @@ make_iota_array()
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2,
tr), ref_2);
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2,
fa), V());
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2, k),
ref_k_2);
+
+ t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2,
tr,
+ simd::flag_convert), ref_2);
+ t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2,
fa,
+ simd::flag_convert), V());
+ t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2,
k,
+ simd::flag_convert), ref_k_2);
}
};
};
Matthias Kretz [Monday, 2 March 2026 11:15:49 CET]:
> Signed-off-by: Matthias Kretz <[email protected]>
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/simd_loadstore.h (unchecked_load): Scalar partial
> masked loads are either a simple load or nothing.
> * include/bits/simd_mask.h (_S_partial_mask_of_n): Conversion
> from bool or unsigned integer to basic_mask needs explicit
> constructor call. Scalar masks don't need a recursion to
> _S_partial_mask_of_n.
> * include/bits/simd_vec.h (_S_masked_load): Add case for size 1.
> This is needed for recursion from _ScalarAbi<N> with N > 1.
> * testsuite/std/simd/loads.cc: Add tests for masked loads.
> ---
> libstdc++-v3/include/bits/simd_loadstore.h | 11 ++--
> libstdc++-v3/include/bits/simd_mask.h | 15 ++++--
> libstdc++-v3/include/bits/simd_vec.h | 6 ++-
> libstdc++-v3/testsuite/std/simd/loads.cc | 63 ++++++++++++++++------
> 4 files changed, 70 insertions(+), 25 deletions(-)
--
──────────────────────────────────────────────────────────────────────────
Dr. Matthias Kretz https://mattkretz.github.io
GSI Helmholtz Center for Heavy Ion Research https://gsi.de
std::simd
──────────────────────────────────────────────────────────────────────────
signature.asc
Description: This is a digitally signed message part.
