Hi there,
I have implemented a move of a v16sf type like this because it is held by 4
v4sf registers:
--- snip ---
(define_expand "movv16sf"
[(set (match_operand:V16SF 0 "nonimmediate_operand" "")
(match_operand:V16SF 1 "general_operand" ""))]
""
" if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V16SFmode)
&& !nonmemory_operand (operands[1], V16SFmode))
operands[1] = force_reg (V16SFmode, operands[1]);
move_v16sf( operands );
DONE;
")
--- end snip ---
and in the config's .c file:
--- snip ---
void
move_v16sf (operands )
rtx operands[];
{
rtx op0 = operands[0];
rtx op1 = operands[1];
enum rtx_code code0 = GET_CODE (operands[0]);
enum rtx_code code1 = GET_CODE (operands[1]);
int subreg_offset0 = 0;
int subreg_offset1 = 0;
enum delay_type delay = DELAY_NONE;
if (code0 == REG)
{
int regno0 = REGNO (op0) + subreg_offset0;
if (code1 == REG)
{
int regno1 = REGNO (op1) + subreg_offset1;
/* Just in case, don't do anything for assigning a register
to itself, unless we are filling a delay slot. */
if (regno0 == regno1 && set_nomacro == 0) return;
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 0 ), gen_rtx_SUBREG(
V4SFmode, op1, 0 ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 16 ), gen_rtx_SUBREG(
V4SFmode, op1, 16 ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 32 ), gen_rtx_SUBREG(
V4SFmode, op1, 32 ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 48 ), gen_rtx_SUBREG(
V4SFmode, op1, 48 ) );
}
else if (code1 == MEM)
{
rtx src_reg;
src_reg = copy_addr_to_reg ( XEXP (op1,0) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 0 ), gen_rtx_MEM(
V4SFmode, src_reg ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 16 ), gen_rtx_MEM(
V4SFmode, plus_constant( src_reg, 16 ) ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 32 ), gen_rtx_MEM(
V4SFmode, plus_constant( src_reg, 32 ) ) );
emit_move_insn( gen_rtx_SUBREG (V4SFmode, op0, 48 ), gen_rtx_MEM(
V4SFmode, plus_constant( src_reg, 48 ) ) );
}
}
else if (code0 == MEM)
{
if (code1 == REG)
{
rtx dest_reg;
dest_reg = copy_addr_to_reg ( XEXP (op0,0) );
emit_move_insn( gen_rtx_MEM( V4SFmode, dest_reg ), gen_rtx_SUBREG
(V4SFmode, op1, 0 ) );
emit_move_insn( gen_rtx_MEM( V4SFmode, plus_constant( dest_reg, 16) ),
gen_rtx_SUBREG (V4SFmode, op1, 16 ) );
emit_move_insn( gen_rtx_MEM( V4SFmode, plus_constant( dest_reg, 32) ),
gen_rtx_SUBREG (V4SFmode, op1, 32 ) );
emit_move_insn( gen_rtx_MEM( V4SFmode, plus_constant( dest_reg, 48) ),
gen_rtx_SUBREG (V4SFmode, op1, 48 ) );
}
}
}
--- end snip ---
This works ok, but it produces inefficient code, here some sample source
code:
--- snip ---
typedef int v4 __attribute__((mode(V4SF)));
typedef int m4 __attribute__((mode(V16SF)));
v4 vec1, vec2;
m4 frog;
int main( int argc, char* argv[] )
{
m4 blob;
asm( "some_instruction %0,%1,%2,%3" : "=&j" (blob): "j" (vec1), "j" (vec2),
"j" (frog) );
asm( "some_instruction2 %0,%1" : "=&j" (frog) : "j" (blob) );
return 0;
}
--- end snip ---
where j is the register class for v4sf and v16sf types.
This produces a move of the v16sf type between the two asm instructions,
when it doesn't need to, does anyone have any ideas why this move isn't
eliminated?
#APP
some_instruction r10,r22,r20,r00
#NO_APP
move r00,r10
move r01,r11
move r02,r12
move r03,r13
#APP
some_instruction2 r10, r00
r10 isn't needed to be preserved (it isn't written out) but it seems to be
making a copy anyway. Worse, if "blob" is defined in global space like
"frog", then it also writes out r10 to memory when it shouldn't.
Any ideas appreciated.
Regards
- Re: moving v16sf reg with multiple sub-regs Dylan Cuthbert
- Re: moving v16sf reg with multiple sub-regs Dylan Cuthbert
- Re: moving v16sf reg with multiple sub-regs James E Wilson
- Re: moving v16sf reg with multiple sub-regs Dylan Cuthbert
- Re: moving v16sf reg with multiple sub-regs James E Wilson
- Re: moving v16sf reg with multiple sub-regs Dylan Cuthbert
- Re: moving v16sf reg with multiple sub-regs James E Wilson
- Re: moving v16sf reg with multiple sub-... Dylan Cuthbert
- Re: moving v16sf reg with multiple... James E Wilson
- Re: moving v16sf reg with mult... Dylan Cuthbert
- Re: moving v16sf reg with multiple sub-regs Richard Sandiford