https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108778
Bug ID: 108778
Summary: Missing optimization with direct register access
instead of structure mapping
Product: gcc
Version: 12.2.1
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: klaus.doldinger64 at googlemail dot com
Target Milestone: ---
The following example uses two functional identical ways to save the SREG of a
AVR µC, in this case an avr128da32.
To access the SREG two different ways are used: one directly via the SREG macro
and one via a structure mapping (the Cpu_t structure is actually not included
in the avr headers,
so a definition of the Cpu_t structure is given here).
If the SREG macro way is used the optimization regarding the variable g is
missed (see assembler code below).
Using the structure mapping the load / store of g is correctly optimized out of
the loop.
In the SREG macro case unfortunately not!
#include <stdint.h>
#include <util/atomic.h>
#include <avr/interrupt.h>
#include <avr/cpufunc.h>
#define ACCESS_ONCE(x) (*(volatile typeof(x)*)&(x))
typedef struct Cpu { // this is missing in avr headers
volatile uint8_t r0;
volatile uint8_t r1;
volatile uint8_t r2;
volatile uint8_t r3;
volatile uint8_t ccp;
volatile uint8_t r5;
volatile uint8_t r6;
volatile uint8_t r7;
volatile uint8_t r8;
volatile uint8_t r9;
volatile uint8_t ra;
volatile uint8_t rampz;
volatile uint8_t rc;
volatile uint16_t sp;
volatile uint8_t sreg;
} Cpu_t;
#define CPU (*(Cpu_t *) 0x0030)
static uint8_t flag;
static uint16_t counter;
static uint16_t g;
static inline uint16_t count() {
const uint8_t save = CPU.sreg;
// const uint8_t save = SREG; // suppresses optimization
asm volatile("cli" : : :);
const uint16_t t = ACCESS_ONCE(counter);
// SREG = save; // suppresses optimization
CPU.sreg = save;
return t;
}
static void func(void) {
for(uint8_t i = 0; i < 20; i++) {
g += count();
if (ACCESS_ONCE(flag)) {
ACCESS_ONCE(flag) = 1;
}
}
}
ISR(USART0_RXC_vect) {
_MemoryBarrier();
counter += 1;
if (counter >= 100) {
flag = 1;
}
}
int main() {
func();
}
the generated assembly should be:
main:
lds r24,g ; g_lsm.16, g
lds r25,g+1 ; g_lsm.16, g
ldi r18,lo8(20) ; ivtmp_7,
ldi r19,lo8(1) ; tmp56,
.L5:
in r22,__SREG__ ; save, MEM[(struct Cpu_t *)48B].sreg
cli
lds r20,counter ; t, MEM[(volatile uint16_t *)&counter]
lds r21,counter+1 ; t, MEM[(volatile uint16_t *)&counter]
out __SREG__,r22 ; MEM[(struct Cpu_t *)48B].sreg, save
add r24,r20 ; g_lsm.16, t
adc r25,r21 ; g_lsm.16, t
lds r20,flag ; _6, MEM[(volatile uint8_t *)&flag]
cpse r20,__zero_reg__ ; _6
sts flag,r19 ; MEM[(volatile uint8_t *)&flag], tmp56
.L4:
subi r18,lo8(-(-1)) ; ivtmp_7,
cpse r18,__zero_reg__ ; ivtmp_7,
rjmp .L5 ;
sts g,r24 ; g, g_lsm.16
sts g+1,r25 ; g, g_lsm.16
ldi r24,0 ;
ldi r25,0 ;
ret
but using SREG it gets:
main:
ldi r24,lo8(20) ; ivtmp_12,
ldi r25,lo8(1) ; tmp59,
.L5:
in r18,__SREG__ ; save, MEM[(volatile uint8_t *)63B]
cli
lds r20,counter ; t, MEM[(volatile uint16_t *)&counter]
lds r21,counter+1 ; t, MEM[(volatile uint16_t *)&counter]
out __SREG__,r18 ; MEM[(struct Cpu_t *)48B].sreg, save
lds r18,g ; g, g
lds r19,g+1 ; g, g
add r18,r20 ; tmp53, t
adc r19,r21 ; , t
sts g,r18 ; g, tmp53
sts g+1,r19 ; g, tmp53
lds r18,flag ; _6, MEM[(volatile uint8_t *)&flag]
cpse r18,__zero_reg__ ; _6
sts flag,r25 ; MEM[(volatile uint8_t *)&flag], tmp59
.L4:
subi r24,lo8(-(-1)) ; ivtmp_12,
cpse r24,__zero_reg__ ; ivtmp_12,
rjmp .L5 ;
ldi r24,0 ;
ldi r25,0 ;
ret