--- Comment #11 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
Hi, I think this RVV compiler codegen is that optimal codegen we want for RVV:

.LBB0_5:                                # %vector.body
        sub     a4, t0, a3
        vsetvli t1, a4, e64, m1, ta, mu
        mul     a2, a3, t2
        add     a5, t3, a2
        vlse64.v        v8, (a5), t2
        add     a4, a6, a2
        vlse64.v        v9, (a4), t2
        add     a4, a0, a2
        vlse64.v        v10, (a4), t2
        vfadd.vv        v8, v8, v9
        vfmul.vf        v8, v8, fa5
        vfadd.vf        v9, v10, fa4
        vfmadd.vf       v9, fa3, v10
        vlse64.v        v10, (a5), t2
        add     a4, a1, a2
        vsse64.v        v9, (a4), t2
        vfadd.vf        v8, v8, fa2
        vfmadd.vf       v8, fa3, v10
        vfadd.vf        v8, v8, fa1
        add     a2, a2, a7
        add     a3, a3, t1
        vsse64.v        v8, (a2), t2
        bne     a3, t0, .LBB0_5

Reply via email to