Consider the following RTL peephole from avr.md:
(define_peephole2 ; avr.md:5387
[(match_scratch:QI 3 "d")
(parallel [(set (match_operand:ALL4 0 "register_operand" "")
(ashift:ALL4 (match_operand:ALL4 1
"register_operand" "")
(match_operand:QI 2 "const_int_operand"
"")))
(clobber (reg:CC REG_CC))])]
""
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 2)))
(clobber (match_dup 3))
(clobber (reg:CC REG_CC))])])
As far as I understand, its purpose is to provide a QImode
scratch register provided such a scratch is available.
However, in the .peephole2 RTL dump with -da I see the following:
Splitting with gen_peephole2_100 (avr.md:5387)
...
(insn 24 8 15 2 (parallel [
(set (reg:SI 22 r22 [orig:47 _3 ] [47])
(ashift:SI (reg:SI 20 r20 [orig:48 x ] [48])
(const_int 7 [0x7])))
(clobber (reg:QI 24 r24))
(clobber (reg:CC 36 cc))
])
(nil))
That is, the scratch r24:QI is overlapping the output in
r22:SI. All hard registers are 8-bit regs and hence r22:SI
extends from r22...r25.
A scratch that overlaps the operands is pretty much useless
or even plain wrong. recog.cc::peep2_find_free_register()
has this comment: /* Don't use registers set or clobbered by the insn. */
from = peep2_buf_position (peep2_current + from);
to = peep2_buf_position (peep2_current + to);
gcc_assert (peep2_insn_data[from].insn != NULL_RTX);
REG_SET_TO_HARD_REG_SET (live, peep2_insn_data[from].live_before);
while (from != to)
{
gcc_assert (peep2_insn_data[from].insn != NULL_RTX);
/* Don't use registers set or clobbered by the insn. */
FOR_EACH_INSN_DEF (def, peep2_insn_data[from].insn)
SET_HARD_REG_BIT (live, DF_REF_REGNO (def));
from = peep2_buf_position (from + 1);
}
So it this bogus in that it assumes all registers extend only
over one hard reg?
FYI, the purpose is to provide a scratch without increasing the register
pressure (which "match_scratch" would do). Therefore, the RTL peephole
is used instead of forcing reload to come up with a scratch.
More specifically, I see this with
$ avr-gcc bogus-peep2.c -S -Os -da
long ashl32_7 (int i, long x)
{
return x << 7;
}
with the attached WIP patch atop trunk b222ee10045d.
Johann
Target: avr
Configured with: ../../source/gcc-master/configure --target=avr
--disable-nls --with-dwarf2 --with-gnu-as --with-gnu-ld --disable-shared
--enable-languages=c,c++
Thread model: single
Supported LTO compression algorithms: zlib
gcc version 15.0.0 20241119 (experimental) (GCC)
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index d316e0182a2..f2474f46d6b 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -169,6 +169,8 @@ extern rtx cc_reg_rtx;
extern rtx ccn_reg_rtx;
extern rtx cczn_reg_rtx;
+extern bool avr_shift_is_3op;
+
#endif /* RTX_CODE */
#ifdef REAL_VALUE_TYPE
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 508e2d147bf..8b8801e44ec 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -229,6 +229,14 @@ bool avr_need_clear_bss_p = false;
bool avr_need_copy_data_p = false;
bool avr_has_rodata_p = false;
+/* Whether some shift insn alternatives are a 3-operand insn
+ or a 2-operand insn. This is used when shift insns are
+ split by ???. The splitted alternatives allow the source
+ and the destination register of the shift to be different
+ right from the start, because the split will split off
+ a byte-shift which allows 3 operands. */
+bool avr_shift_is_3op = false;
+
/* Transform UP into lowercase and write the result to LO.
You must provide enough space for LO. Return LO. */
@@ -437,6 +445,8 @@ avr_set_core_architecture (void)
static void
avr_option_override (void)
{
+ avr_shift_is_3op = true;
+
/* caller-save.cc looks for call-clobbered hard registers that are assigned
to pseudos that cross calls and tries so save-restore them around calls
in order to reduce the number of stack slots needed.
@@ -6590,7 +6600,7 @@ avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
/* Generate asm equivalent for various shifts. This only handles cases
- that are not already carefully hand-optimized in ?sh??i3_out.
+ that are not already carefully hand-optimized in ?sh<mode>3_out.
OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 04d838ef8a7..73306f6b85a 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -184,73 +184,75 @@ (define_attr "adjust_len"
;; no_xmega: non-XMEGA core xmega : XMEGA core
;; no_adiw: ISA has no ADIW, SBIW adiw : ISA has ADIW, SBIW
+;; The following ISA attributes are actually not architecture specific,
+;; but depend on (optimization) options. This is because the "enabled"
+;; attribut can't depend on more than one other attribute. This means
+;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
+;; scheme can be used (as opposed to a true cartesian product).
+
+;; 2op : insn is a 2-operand insn 3op : insn is a 3-operand insn
+
(define_attr "isa"
"mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
no_adiw,adiw,
+ 2op,3op,
standard"
(const_string "standard"))
(define_attr "enabled" ""
- (cond [(eq_attr "isa" "standard")
- (const_int 1)
+ (if_then_else
+ (ior (eq_attr "isa" "standard")
(and (eq_attr "isa" "mov")
(match_test "!AVR_HAVE_MOVW"))
- (const_int 1)
(and (eq_attr "isa" "movw")
(match_test "AVR_HAVE_MOVW"))
- (const_int 1)
(and (eq_attr "isa" "rjmp")
(match_test "!AVR_HAVE_JMP_CALL"))
- (const_int 1)
(and (eq_attr "isa" "jmp")
(match_test "AVR_HAVE_JMP_CALL"))
- (const_int 1)
(and (eq_attr "isa" "ijmp")
(match_test "!AVR_HAVE_EIJMP_EICALL"))
- (const_int 1)
(and (eq_attr "isa" "eijmp")
(match_test "AVR_HAVE_EIJMP_EICALL"))
- (const_int 1)
(and (eq_attr "isa" "lpm")
(match_test "!AVR_HAVE_LPMX"))
- (const_int 1)
(and (eq_attr "isa" "lpmx")
(match_test "AVR_HAVE_LPMX"))
- (const_int 1)
(and (eq_attr "isa" "elpm")
(match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
- (const_int 1)
(and (eq_attr "isa" "elpmx")
(match_test "AVR_HAVE_ELPMX"))
- (const_int 1)
(and (eq_attr "isa" "xmega")
(match_test "AVR_XMEGA"))
- (const_int 1)
(and (eq_attr "isa" "no_xmega")
(match_test "!AVR_XMEGA"))
- (const_int 1)
(and (eq_attr "isa" "adiw")
(match_test "AVR_HAVE_ADIW"))
- (const_int 1)
(and (eq_attr "isa" "no_adiw")
(match_test "!AVR_HAVE_ADIW"))
- (const_int 1)
- ] (const_int 0)))
+ (and (eq_attr "isa" "2op")
+ (match_test "!avr_shift_is_3op"))
+
+ (and (eq_attr "isa" "3op")
+ (match_test "avr_shift_is_3op"))
+ )
+ (const_int 1)
+ (const_int 0)))
;; Define mode iterators
@@ -5257,28 +5259,31 @@ (define_peephole2
;; "ashlsq3" "ashlusq3"
;; "ashlsa3" "ashlusa3"
(define_insn_and_split "ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))]
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C31,C07,C07,n,Qm")))]
""
"#"
"&& reload_completed"
[(parallel [(set (match_dup 0)
(ashift:ALL4 (match_dup 1)
(match_dup 2)))
- (clobber (reg:CC REG_CC))])])
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "isa" "*,*,*,2op,3op,*,*")])
(define_insn "*ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r ,r,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r ,0,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,L,P,O C31,K,n,Qm")))
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C31,C07,C07,n,Qm")))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
- [(set_attr "length" "8,0,4,5,8,10,12")
- (set_attr "adjust_len" "ashlsi")])
+ [(set_attr "length" "12")
+ (set_attr "adjust_len" "ashlsi")
+ (set_attr "isa" "*,*,*,2op,3op,*,*")])
;; Optimize if a scratch register from LD_REGS happens to be available.
@@ -5396,32 +5401,35 @@ (define_peephole2
;; "*ashlsi3_const"
;; "*ashlsq3_const" "*ashlusq3_const"
;; "*ashlsa3_const" "*ashlusa3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
- (match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))]
- "reload_completed"
- "#"
- "&& reload_completed"
- [(parallel [(set (match_dup 0)
- (ashift:ALL4 (match_dup 1)
- (match_dup 2)))
- (clobber (match_dup 3))
- (clobber (reg:CC REG_CC))])])
+;; (define_insn_and_split "*ashl<mode>3_const_split"
+;; [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r")
+;; (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0")
+;; (match_operand:QI 2 "const_int_operand" "LP,O C31,C07,C07,n")))
+;; (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d ,&d"))]
+;; "reload_completed"
+;; "#"
+;; "&& reload_completed"
+;; [(parallel [(set (match_dup 0)
+;; (ashift:ALL4 (match_dup 1)
+;; (match_dup 2)))
+;; (clobber (match_dup 3))
+;; (clobber (reg:CC REG_CC))])]
+;; ""
+;; [(set_attr "isa" "*,*,2op,3op,*")])
(define_insn "*ashl<mode>3_const"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r,r ,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r ,0")
- (match_operand:QI 2 "const_int_operand" "L,P,O C31,n")))
- (clobber (match_scratch:QI 3 "=X,X,X ,&d"))
+ [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0")
+ (match_operand:QI 2 "const_int_operand" "LP,O C31,C07,C07,n")))
+ (clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X ,&d ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
return ashlsi3_out (insn, operands, NULL);
}
- [(set_attr "length" "0,4,5,10")
- (set_attr "adjust_len" "ashlsi")])
+ [(set_attr "length" "10")
+ (set_attr "adjust_len" "ashlsi")
+ (set_attr "isa" "*,*,2op,3op,*")])
(define_expand "ashlpsi3"
[(parallel [(set (match_operand:PSI 0 "register_operand" "")
;; Function ashl32_7 (ashl32_7, funcdef_no=0, decl_uid=1900, cgraph_uid=1,
symbol_order=0)
starting the processing of deferred insns
ending the processing of deferred insns
df_analyze called
ashl32_7
Dataflow summary:
def_info->table_size = 0, use_info->table_size = 0
;; fully invalidated by EH 0 [r0] 1 [r1] 18 [r18] 19 [r19] 20 [r20] 21
[r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 26 [r26] 27 [r27] 30 [r30] 31 [r31]
33 [__SP_H__] 35 [argH] 36 [cc]
;; hardware regs used 32 [__SP_L__]
;; regular block artificial uses 32 [__SP_L__]
;; eh block artificial uses 32 [__SP_L__] 34 [argL]
;; entry block defs 8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14
[r14] 15 [r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22]
23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; exit block uses 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; regs ever live 20 [r20] 21 [r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25]
36 [cc]
;; ref usage r8={1d} r9={1d} r10={1d} r11={1d} r12={1d} r13={1d} r14={1d}
r15={1d} r16={1d} r17={1d} r18={1d} r19={1d} r20={1d,1u} r21={1d,1u}
r22={2d,3u} r23={2d,3u} r24={2d,2u} r25={2d,2u} r32={1d,2u} r36={1d}
;; total ref usage 38{24d,14u,0e} in 3{3 regular + 0 call} insns.
( )->[0]->( 2 )
;; bb 0 artificial_defs: { d-1(8){ }d-1(9){ }d-1(10){ }d-1(11){ }d-1(12){
}d-1(13){ }d-1(14){ }d-1(15){ }d-1(16){ }d-1(17){ }d-1(18){ }d-1(19){ }d-1(20){
}d-1(21){ }d-1(22){ }d-1(23){ }d-1(24){ }d-1(25){ }d-1(32){ }}
;; bb 0 artificial_uses: { }
;; lr in
;; lr use
;; lr def 8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 [r14] 15
[r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 23 [r23]
24 [r24] 25 [r25] 32 [__SP_L__]
;; live in
;; live gen 8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 [r14] 15
[r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 23 [r23]
24 [r24] 25 [r25] 32 [__SP_L__]
;; live kill
;; lr out 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live out 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
( 0 )->[2]->( 1 )
;; bb 2 artificial_defs: { }
;; bb 2 artificial_uses: { u-1(32){ }}
;; lr in 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr use 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr def 22 [r22] 23 [r23] 24 [r24] 25 [r25] 36 [cc]
;; live in 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live gen 22 [r22] 23 [r23] 24 [r24] 25 [r25]
;; live kill 36 [cc]
;; lr out 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live out 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
( 2 )->[1]->( )
;; bb 1 artificial_defs: { }
;; bb 1 artificial_uses: { u-1(22){ }u-1(23){ }u-1(24){ }u-1(25){ }u-1(32){ }}
;; lr in 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; lr use 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; lr def
;; live in 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live gen
;; live kill
;; lr out
;; live out
Finding needed instructions:
Adding insn 20 to worklist
Adding insn 13 to worklist
Finished finding needed instructions:
processing block 2 lr out = 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
Adding insn 17 to worklist
Splitting with gen_peephole2_100 (avr.md:5388)
scanning new insn with uid = 22.
deleting insn with uid = 17.
verify found no changes in insn with uid = 22.
starting the processing of deferred insns
ending the processing of deferred insns
ashl32_7
Dataflow summary:
;; fully invalidated by EH 0 [r0] 1 [r1] 18 [r18] 19 [r19] 20 [r20] 21
[r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 26 [r26] 27 [r27] 30 [r30] 31 [r31]
33 [__SP_H__] 35 [argH] 36 [cc]
;; hardware regs used 32 [__SP_L__]
;; regular block artificial uses 32 [__SP_L__]
;; eh block artificial uses 32 [__SP_L__] 34 [argL]
;; entry block defs 8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14
[r14] 15 [r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22]
23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; exit block uses 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; regs ever live 20 [r20] 21 [r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25]
36 [cc]
;; ref usage r8={1d} r9={1d} r10={1d} r11={1d} r12={1d} r13={1d} r14={1d}
r15={1d} r16={1d} r17={1d} r18={1d} r19={1d} r20={1d,1u} r21={1d,1u}
r22={2d,3u} r23={2d,3u} r24={3d,2u} r25={2d,2u} r32={1d,2u} r36={1d}
;; total ref usage 39{25d,14u,0e} in 3{3 regular + 0 call} insns.
(note 1 0 5 NOTE_INSN_DELETED)
;; basic block 2, loop depth 0, count 1073741824 (estimated locally, freq
1.0000), maybe hot
;; prev block 0, next block 1, flags: (REACHABLE, RTL, MODIFIED)
;; pred: ENTRY [always] count:1073741824 (estimated locally, freq
1.0000) (FALLTHRU)
;; bb 2 artificial_defs: { }
;; bb 2 artificial_uses: { u-1(32){ }}
;; lr in 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr use 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr def 22 [r22] 23 [r23] 24 [r24] 25 [r25] 36 [cc]
;; live in 20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live gen 22 [r22] 23 [r23] 24 [r24] 25 [r25]
;; live kill 36 [cc]
(note 5 1 18 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 18 5 3 2 NOTE_INSN_PROLOGUE_END)
(note 3 18 4 2 NOTE_INSN_DELETED)
(note 4 3 22 2 NOTE_INSN_FUNCTION_BEG)
(insn 22 4 13 2 (parallel [
(set (reg:SI 22 r22 [orig:46 _2 ] [46])
(ashift:SI (reg:SI 20 r20 [orig:47 x ] [47])
(const_int 7 [0x7])))
(clobber (reg:QI 24 r24))
(clobber (reg:CC 36 cc))
]) "bogus-peep2.c":3:14 -1
(nil))
(insn 13 22 19 2 (use (reg/i:SI 22 r22)) "bogus-peep2.c":4:1 -1
(nil))
(note 19 13 20 2 NOTE_INSN_EPILOGUE_BEG)
(jump_insn 20 19 21 2 (return) "bogus-peep2.c":4:1 -1
(nil)
-> return)
;; succ: EXIT [always] count:1073741824 (estimated locally, freq 1.0000)
;; lr out 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live out 22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
(barrier 21 20 16)
(note 16 21 0 NOTE_INSN_DELETED)