https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113166
--- Comment #3 from JuzheZhong <juzhe.zhong at rivai dot ai> --- #include <cstdint> #include <riscv_vector.h> template <size_t length> inline vuint8m1_t tail_load(void const* data); template<> inline vuint8m1_t tail_load<sizeof(uint64_t)>(void const* data) { uint64_t const* ptr64 = reinterpret_cast<uint64_t const*>(data); #if 1 const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0, __riscv_vsetvlmax_e64m1()); vuint64m1_t v64 = __riscv_vslide1up(zero, *ptr64, __riscv_vsetvlmax_e64m1()); return __riscv_vreinterpret_u8m1(v64); #elif 1 vuint64m1_t v64 = __riscv_vmv_s_x_u64m1(*ptr64, 1); const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0, __riscv_vsetvlmax_e64m1()); v64 = __riscv_vslideup(v64, zero, 1, __riscv_vsetvlmax_e8m1()); return __riscv_vreinterpret_u8m1(v64); #elif 1 vuint64m1_t v64 = __riscv_vle64_v_u64m1(ptr64, 1); const vuint64m1_t zero = __riscv_vmv_v_x_u64m1(0, __riscv_vsetvlmax_e64m1()); v64 = __riscv_vslideup(v64, zero, 1, __riscv_vsetvlmax_e8m1()); return __riscv_vreinterpret_u8m1(v64); #else vuint8m1_t v = __riscv_vreinterpret_u8m1(__riscv_vle64_v_u64m1(ptr64, 1)); const vuint8m1_t zero = __riscv_vmv_v_x_u8m1(0, __riscv_vsetvlmax_e8m1()); return __riscv_vslideup(v, zero, sizeof(uint64_t), __riscv_vsetvlmax_e8m1()); #endif } vuint8m1_t test2(uint64_t data) { return tail_load<sizeof(data)>(&data); } GCC ASM: test2(unsigned long): vsetvli a5,zero,e64,m1,ta,ma vmv.v.i v8,0 vmv1r.v v9,v8 vslide1up.vx v8,v9,a0 ret LLVM ASM: test2(unsigned long): # @test2(unsigned long) vsetvli a1, zero, e64, m1, ta, ma vmv.v.i v9, 0 vslide1up.vx v8, v9, a0 ret