I'm porting gcc 4.6.2 to a 16 bit CPU that has four GP registers. I've
chosen to allocate R3 as the frame pointer when one is needed.
In line with GCC Internals info on FIXED_REGISTERS ("except on machines
where that can be used as a general register when no frame pointer is
needed") I have not marked R3 as fixed. The problem I'm describing below
doesn't occur when FP (R3) is marked as FIXED.
#define FIXED_REGISTERS
{
/* r0 r1 r2 r3 sp pc cc .....*/
0, 0, 0, 0, 1, 1, 1, 1
}
and FP is marked as ELIMINABLE
#define ELIMINABLE_REGS \
{ \
{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
{ ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \
}
TARGET_FUNCTION_ARG only allows parameter passing in R0 & R1.
This functions perfectly for all optimizations (-O0...-O3) and both
-fomit-frame-pointer and -fno-omit-frame-pointer excepting "-Os
-fno-omit-frame-pointer" where the following test snippet
struct ts
{
int x,z,y;
};
int f( struct ts *s)
{
while( --s->y )
{
s->x *= s->z/s->x;
}
return s->x;
}
produces ..
...
.L3:
mov r1,r2 ;# 13 *movhihi/1 [length = 2]
--> mov r3,[r3+2] ;# 51 *movhihi/3 [length = 4]
mov r0,[r3+2] ;# 14 *movhihi/3 [length = 4]
call ___divhi3 ;# 15 call_value_label/1 [length = 4]
mul r2,r0 ;# 17 mulhi3/2 [length = 2]
mov r0,[r3+2] ;# 52 *movhihi/3 [length = 4]
str r2,[r0] ;# 18 *movhihi/4 [length = 4]
.L2:
inc [r3],#-1 ;# 65 inchi3_cc [length = 2]
--> mov r3,[r3+2] ;# 55 *movhihi/3 [length = 4]
mov r2,[r3] ;# 41 *movhihi/3 [length = 4]
bne .L3 ;# 24 cbranchcc4 [length = 6]
...
I have marked the lines that uses r3 as a GP register and clobbers it
with -->. The corresponding RTL is ...
(insn 51 13 14 3 (set (reg:HIreg:3)
(mem/c:HI (plus:HI (reg/f:HIreg:3)
(const_int 2 [0x2])) [4 %sfp+2 S2 A16])) t.c:14 88
{*movhihi}
(nil))
(insn 14 51 15 3 (set (reg:HIreg:0)
(mem/s:HI (plus:HI (reg:HIreg:3)
(const_int 2 [0x2])) [2 s_1(D)->z+0 S2 A16])) t.c:14 88
{*movhihi}
(nil))
(call_insn/u 15 14 16 3 (set (reg:HIreg:0)
(call (mem:HI (symbol_ref:HI ("__divhi3") [flags 0x41]) [0 S2 A8])
when -O2 is used the offending fragment is correct ...
.L4:
mov r1,[r3] ;# 23 *movhihi/3 [length = 4]
mov r0,[r3+2] ;# 24 *movhihi/3 [length = 4]
call ___divhi3 ;# 25 call_value_label/1 [length = 4]
mov r1,[r3] ;# 64 *movhihi/3 [length = 4]
mul r1,r0 ;# 28 mulhi3/2 [length = 2]
str r1,[r3] ;# 65 *movhihi/4 [length = 4]
add r2,#-1 ;# 29 addhi3_cc/3 [length = 2]
bne .L4 ;# 32 cbranchcc4 [length = 6]
I wonder if anyone can provide some hints before I waste time hunting
down an optimisation bug that is really a problem with my configuration ?
Cheers, Paul