https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109874

            Bug ID: 109874
           Summary: [SH] GCC 13's -Os code is 50% bigger than GCC 4's
           Product: gcc
           Version: 13.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: paul at crapouillou dot net
  Target Milestone: ---

Using the following C code snippet:

------
unsigned int CHRmask1,CHRmask2,CHRmask4,CHRmask8;

void SetupCartCHRMapping(unsigned int size)
{
#if 0
    CHRmask1 = (size >> 10) - 1;
    CHRmask2 = (size >> 11) - 1;
    CHRmask4 = (size >> 12) - 1;
    CHRmask8 = (size >> 13) - 1;
#else
    size >>= 10;

    CHRmask1 = size - 1;
    size >>= 1;
    CHRmask2 = size - 1;
    size >>= 1;
    CHRmask4 = size - 1;
    size >>= 1;
    CHRmask8 = size - 1;
#endif
}
------

Compiling with -Os, GCC 13.1 will generate the exact same code for the two
cases, as it rightfully detects that they are functionally the same:

------
_SetupCartCHRMapping:
        mov.l   r12,@-r15
        mova    .L3,r0
        mov.l   .L3,r12
        mov     r4,r1
        shlr8   r1
        add     r0,r12
        mov.l   .L4,r0
        shlr2   r1
        add     #-1,r1
        mov.l   r1,@(r0,r12)
        mov     r4,r1
        shlr8   r1
        mov.l   .L5,r0
        shlr    r1
        shlr2   r1
        add     #-1,r1
        mov.l   r1,@(r0,r12)
        mov     r4,r1
        shlr8   r1
        mov.l   .L6,r0
        shlr2   r1
        shlr2   r1
        shlr8   r4
        add     #-1,r1
        shlr2   r4
        mov.l   r1,@(r0,r12)
        shlr    r4
        mov.l   .L7,r0
        shlr2   r4
        add     #-1,r4
        mov.l   r4,@(r0,r12)
        rts     
        mov.l   @r15+,r12
.L3:
        .long   _GLOBAL_OFFSET_TABLE_
.L4:
        .long   _CHRmask1@GOTOFF
.L5:
        .long   _CHRmask2@GOTOFF
.L6:
        .long   _CHRmask4@GOTOFF
.L7:
        .long   _CHRmask8@GOTOFF
_CHRmask8:
        .zero   4
_CHRmask4:
        .zero   4
_CHRmask2:
        .zero   4
_CHRmask1:
        .zero   4
------

The code part (excluding labels and data fields) is 33 instructions.

GCC 4.9.4 won't detect that the two versions of the code are equivalent, and
generate different machine code for them. The second version generates the
smallest code, at only 21 instructions:

------
_SetupCartCHRMapping:
        shlr8   r4
        shlr2   r4
        mov.l   .L2,r1
        mov     r4,r2
        add     #-1,r2
        mov.l   r2,@r1
        mov     r4,r1
        mov.l   .L3,r2
        shlr    r1
        add     #-1,r1
        mov.l   r1,@r2
        shlr2   r4
        mov.l   .L4,r1
        mov     r4,r2
        add     #-1,r2
        mov.l   r2,@r1
        shlr    r4
        mov.l   .L5,r1
        add     #-1,r4
        rts     
        mov.l   r4,@r1
.L2:
        .long   _CHRmask1
.L3:
        .long   _CHRmask2
.L4:
        .long   _CHRmask4
.L5:
        .long   _CHRmask8
------

So GCC 13.1 at -Os generates code that is 50% bigger than what GCC 4 would
generate for a functionally equivalent algorithm.

Reply via email to