https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116542

            Bug ID: 116542
           Summary: [avr] Missed post increment optimization
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gjl at gcc dot gnu.org
  Target Milestone: ---

Created attachment 59027
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=59027&action=edit
incdec.c: GNU-C99 test case

The attached test case produces very expensive code.  For example the following
function (with decent resource consumption as comments):

$ avr-gcc incdec.c -mmcu=avr4 -S -Os -dp

uint8_t add1 (const uint8_t *bb, uint8_t nn)
{
    // Set Z or X = bb (1 MOVW or 2 MOVs)
    uint8_t sum = 0;   // 1 instruction
    do
    {
        sum += *bb++;  // 2 instructions: POST_INC load + add
    } while (--nn);    // 2 instructions: decrement + branch
    return sum;
}

add1:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
        mov r20,r24      ;  47  [c=4 l=1]  movqi_insn/0
        mov r18,r24      ;  48  [c=4 l=1]  movqi_insn/0
        mov r19,r25      ;  49  [c=4 l=1]  movqi_insn/0
        ldi r24,0                ;  50  [c=4 l=1]  movqi_insn/0
.L5:
        movw r30,r18     ;  38  [c=4 l=1]  *movhi/0
        subi r18,-1      ;  39  [c=4 l=2]  *addhi3_clobber/1
        sbci r19,-1
        ld r25,Z                 ;  40  [c=4 l=1]  movqi_insn/3
        add r24,r25      ;  41  [c=4 l=1]  *addqi3/0
        mov r25,r22      ;  42  [c=4 l=1]  movqi_insn/0
        sub r25,r18      ;  43  [c=4 l=1]  *subqi3/0
        add r25,r20      ;  55  [c=4 l=1]  *op8.for.cczn.plus/1
        brne .L5                 ;  56  [c=4 l=1]  branch_ZN
/* epilogue start */
        ret              ;  53  [c=0 l=1]  return

The loop body uses R18 to hold and compute the address and R30 (Z) for
accessing, instead of just R30 with post-increment. (LD reg, Z+).

Then there is this fancy loop invariant of start_address - current_address + nn
== 0 instead of just decrementing nn (-fivopts issue / assumptions I guess).

Target: avr
Configured with: ../../source/gcc-master/configure --target=avr --disable-nls
--with-dwarf2 --with-gnu-as --with-gnu-ld --disable-shared
--with-long-double=64 --enable-languages=c,c++
gcc version 15.0.0 20240829 (experimental) (GCC)

Reply via email to