https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60884

--- Comment #5 from Oleg Endo <olegendo at gcc dot gnu.org> ---
The test case gcc.target/sh/memset.c:

void
test00(char *dstb)
{
  __builtin_memset (dstb, 0, 15);
}


compiles to:
        mov     r4,r0
        tst     #3,r0
        mov     #0,r1
        bf/s    .L5
        mov     #15,r2
        mov     #3,r2
.L3:
        mov.l   r1,@r4   << loop runs 3x.
        dt      r2       << better emit 3x mov.l
        bf/s    .L3
        add     #4,r4

        mov.b   r1,@r4
        add     #1,r4
        mov.b   r1,@r4
        add     #1,r4
        rts
        mov.b   r1,@r4
        .align 1
.L5:
        mov.b   r1,@r4
        dt      r2
        bf/s    .L5
        add     #1,r4
        rts
        nop

Especially when the number of the iterations is known, we should try to unroll
the loops.

Reply via email to