https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113166

--- Comment #3 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
#include <cstdint>
#include <riscv_vector.h>

template <size_t length>
inline vuint8m1_t tail_load(void const* data);

template<>
inline vuint8m1_t tail_load<sizeof(uint64_t)>(void const* data) {
    uint64_t const* ptr64 = reinterpret_cast<uint64_t const*>(data);
#if 1
    const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0,
__riscv_vsetvlmax_e64m1());
    vuint64m1_t v64 = __riscv_vslide1up(zero, *ptr64,
__riscv_vsetvlmax_e64m1());
    return __riscv_vreinterpret_u8m1(v64);
#elif 1
    vuint64m1_t v64 = __riscv_vmv_s_x_u64m1(*ptr64, 1);
    const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0,
__riscv_vsetvlmax_e64m1());
    v64 = __riscv_vslideup(v64, zero, 1, __riscv_vsetvlmax_e8m1());
    return __riscv_vreinterpret_u8m1(v64);
#elif 1
    vuint64m1_t v64 = __riscv_vle64_v_u64m1(ptr64, 1);
    const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0,
__riscv_vsetvlmax_e64m1());
    v64 = __riscv_vslideup(v64, zero, 1, __riscv_vsetvlmax_e8m1());
    return __riscv_vreinterpret_u8m1(v64);
#else
    vuint8m1_t v = __riscv_vreinterpret_u8m1(__riscv_vle64_v_u64m1(ptr64, 1));
    const vuint8m1_t zero = __riscv_vmv_v_x_u8m1(0, __riscv_vsetvlmax_e8m1());
    return __riscv_vslideup(v, zero, sizeof(uint64_t),
__riscv_vsetvlmax_e8m1());
#endif
}

vuint8m1_t test2(uint64_t data) {
    return tail_load<sizeof(data)>(&data);
}

GCC ASM:

test2(unsigned long):
        vsetvli a5,zero,e64,m1,ta,ma
        vmv.v.i v8,0
        vmv1r.v v9,v8   
        vslide1up.vx    v8,v9,a0
        ret

LLVM ASM:

test2(unsigned long):                              # @test2(unsigned long)
        vsetvli a1, zero, e64, m1, ta, ma
        vmv.v.i v9, 0
        vslide1up.vx    v8, v9, a0
        ret

Reply via email to