Hi,

Is there any reason for 3 loads being issued for these bitfield accesses, given 
two of the loads are bytes, and one is a half; the compiler appears to know the 
structure is aligned at a half word boundary. Secondly, the riscv code is using 
a mixture of 32-bit and 64-bit adds and shifts. Thirdly, with -Os the riscv 
code size is the same, but the schedule is less than optimal. i.e. the 3rd load 
is issued much later.

- https://cx.rv8.io/g/2YDLTA

code:

        struct foo {
          unsigned int a : 5;
          unsigned int b : 5;
          unsigned int c : 5;
        };

        unsigned int proc_foo(struct foo *p)
        {
            return p->a + p->b + p->c;
        }

riscv asm:

        proc_foo(foo*):
          lhu a3,0(a0)
          lbu a4,0(a0)
          lbu a5,1(a0)
          srliw a3,a3,5
          andi a0,a4,31
          srli a5,a5,2
          andi a4,a3,31
          addw a0,a0,a4
          andi a5,a5,31
          add a0,a0,a5
          ret

x86_64 asm:

        proc_foo(foo*):
          movzx edx, BYTE PTR [rdi]
          movzx eax, WORD PTR [rdi]
          mov ecx, edx
          shr ax, 5
          and eax, 31
          and ecx, 31
          lea edx, [rcx+rax]
          movzx eax, BYTE PTR [rdi+1]
          shr al, 2
          and eax, 31
          add eax, edx
          ret

hand coded riscv asm:

        proc_foo(foo*):
          lhu a1,0(a0)
          srli a2,a1,5
          srli a3,a1,10
          andi a0,a1,31
          andi a2,a2,31
          andi a3,a3,31
          add a0,a0,a2
          add a0,a0,a3
          ret

Michael

Reply via email to