https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111926

            Bug ID: 111926
           Summary: RISC-V: Use vsetvl insn replace csrr vlenb insn
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lehua.ding at rivai dot ai
  Target Milestone: ---

We can use: 
        vsetvl a5, zero, e8, mf8, ta, ta
replace:
        csrr    a4,vlenb
        srli    a4,a4,3

The reason for this is that the performance of the vsetvl instruction tends to
be better optimised than the csrr instruction.

#include <riscv_vector.h>

#define exhaust_vector_regs()                                                 
\
  asm volatile("#" ::                                                         
\
                 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
\
                   "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",    
\
                   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",    
\
                   "v26", "v27", "v28", "v29", "v30", "v31");

void
spill_1 (int8_t *in, int8_t *out)
{
  vint8mf8_t v1 = *(vint8mf8_t*)in;
  exhaust_vector_regs ();
  *(vint8mf8_t*)out = v1;
}

spill_1(signed char*, signed char*):
        csrr    a4,vlenb
        srli    a4,a4,3
        csrr    t0,vlenb
        slli    a3,a4,3
        sub     sp,sp,t0
        sub     a3,a3,a4
        add     a3,a3,sp
        vsetvli a5,zero,e8,mf8,ta,ma
        vle8.v  v1,0(a0)
        vse8.v  v1,0(a3)
        csrr    a4,vlenb
        srli    a4,a4,3
        slli    a3,a4,3
        sub     a3,a3,a4
        add     a3,a3,sp
        vle8.v  v1,0(a3)
        csrr    t0,vlenb
        vse8.v  v1,0(a1)
        add     sp,sp,t0
        jr      ra


https://godbolt.org/z/TcKxbjnoh

Reply via email to