[llvm-bugs] [Bug 148422] RVV intrinsics not the best register allocation

LLVM Bugs via llvm-bugs Sun, 13 Jul 2025 03:40:03 -0700

Issue	148422
Summary	RVV intrinsics not the best register allocation
Labels	new issue
Assignees
Reporter	camel-cdr

    While a lot better than GCC, LLVM currently produces a good amount of redundant moves for the following RVV function: https://godbolt.org/z/hnYKedzM9 (43 vs 32 instructions)


```c
vuint16m8_t
trans8x8_vslide(vuint16m8_t v)
{
	size_t VL = __riscv_vsetvlmax_e64m4();
	vbool16_t modd  = __riscv_vreinterpret_b16(
			__riscv_vmv_v_x_u8m1(0b10101010, __riscv_vsetvlmax_e8m1()));
	vbool16_t meven = __riscv_vmnot(modd, VL);
	vbool16_t m;

	vuint64m4_t v4l = __riscv_vreinterpret_u64m4(__riscv_vget_u16m4(v, 0));
	vuint64m4_t v4h = __riscv_vreinterpret_u64m4(__riscv_vget_u16m4(v, 1));
	vuint64m4_t v4lt = v4l;
	m = modd;
	v4l = __riscv_vslide1up_mu(m, v4l, v4h, 0, VL);
	m = meven;
	v4h = __riscv_vslide1down_mu(m, v4h, v4lt, 0, VL);

	vuint32m2_t v2ll = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4l, 0));
	vuint32m2_t v2lh = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4l, 1));
	vuint32m2_t v2hl = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4h, 0));
	vuint32m2_t v2hh = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4h, 1));
	vuint32m2_t v2llt = v2lh, v2hlt = v2hh;
	v2lh = __riscv_vslide1down_mu(m, v2lh, v2ll, 0, VL);
	v2hh = __riscv_vslide1down_mu(m, v2hh, v2hl, 0, VL);
	m = modd;
	v2ll = __riscv_vslide1up_mu(m, v2ll, v2llt, 0, VL);
	v2hl = __riscv_vslide1up_mu(m, v2hl, v2hlt, 0, VL);

	vuint16m1_t v1lll = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2ll, 0));
	vuint16m1_t v1llh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2ll, 1));
	vuint16m1_t v1lhl = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2lh, 0));
	vuint16m1_t v1lhh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2lh, 1));
	vuint16m1_t v1hll = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hl, 0));
	vuint16m1_t v1hlh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hl, 1));
	vuint16m1_t v1hhl = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hh, 0));
	vuint16m1_t v1hhh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hh, 1));
	vuint16m1_t v1lllt = v1lll, v1lhlt = v1lhl, v1hllt = v1hll, v1hhlt = v1hhl;
	v1lll = __riscv_vslide1up_mu(m, v1lll, v1llh, 0, VL);
	v1lhl = __riscv_vslide1up_mu(m, v1lhl, v1lhh, 0, VL);
	v1hll = __riscv_vslide1up_mu(m, v1hll, v1hlh, 0, VL);
	v1hhl = __riscv_vslide1up_mu(m, v1hhl, v1hhh, 0, VL);
	m = meven;
	v1llh = __riscv_vslide1down_mu(m, v1llh, v1lllt, 0, VL);
	v1lhh = __riscv_vslide1down_mu(m, v1lhh, v1lhlt, 0, VL);
	v1hlh = __riscv_vslide1down_mu(m, v1hlh, v1hllt, 0, VL);
	v1hhh = __riscv_vslide1down_mu(m, v1hhh, v1hhlt, 0, VL);

	return __riscv_vcreate_v_u16m1_u16m8(
			v1lll, v1llh, v1lhl, v1lhh,
			v1hll, v1hlh, v1hhl, v1hhh);
}
```

This is probably quite a hard problem, but I thought I'd share it anyway. Maybe slight improvements for this example can have a compounding effect on mask register allocation.

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 148422] RVV intrinsics not the best register allocation

Reply via email to