Consider the following RTL peephole from avr.md:

(define_peephole2               ; avr.md:5387
  [(match_scratch:QI 3 "d")
   (parallel [(set (match_operand:ALL4 0 "register_operand" "")
(ashift:ALL4 (match_operand:ALL4 1 "register_operand" "") (match_operand:QI 2 "const_int_operand" "")))
              (clobber (reg:CC REG_CC))])]
  ""
  [(parallel [(set (match_dup 0)
                   (ashift:ALL4 (match_dup 1)
                                (match_dup 2)))
              (clobber (match_dup 3))
              (clobber (reg:CC REG_CC))])])

As far as I understand, its purpose is to provide a QImode
scratch register provided such a scratch is available.

However, in the .peephole2 RTL dump with -da I see the following:

Splitting with gen_peephole2_100 (avr.md:5387)
...
(insn 24 8 15 2 (parallel [
            (set (reg:SI 22 r22 [orig:47 _3 ] [47])
                 (ashift:SI (reg:SI 20 r20 [orig:48 x ] [48])
                            (const_int 7 [0x7])))
            (clobber (reg:QI 24 r24))
            (clobber (reg:CC 36 cc))
        ])
     (nil))

That is, the scratch r24:QI is overlapping the output in
r22:SI.  All hard registers are 8-bit regs and hence r22:SI
extends from r22...r25.

A scratch that overlaps the operands is pretty much useless
or even plain wrong.  recog.cc::peep2_find_free_register()
has this comment:  /* Don't use registers set or clobbered by the insn.  */

  from = peep2_buf_position (peep2_current + from);
  to = peep2_buf_position (peep2_current + to);

  gcc_assert (peep2_insn_data[from].insn != NULL_RTX);
  REG_SET_TO_HARD_REG_SET (live, peep2_insn_data[from].live_before);

  while (from != to)
    {
      gcc_assert (peep2_insn_data[from].insn != NULL_RTX);

      /* Don't use registers set or clobbered by the insn.  */
      FOR_EACH_INSN_DEF (def, peep2_insn_data[from].insn)
        SET_HARD_REG_BIT (live, DF_REF_REGNO (def));

      from = peep2_buf_position (from + 1);
    }

So it this bogus in that it assumes all registers extend only
over one hard reg?

FYI, the purpose is to provide a scratch without increasing the register
pressure (which "match_scratch" would do).  Therefore, the RTL peephole
is used instead of forcing reload to come up with a scratch.

More specifically, I see this with

$ avr-gcc bogus-peep2.c -S -Os -da

long ashl32_7 (int i, long x)
{
    return x << 7;
}

with the attached WIP patch atop trunk b222ee10045d.

Johann

Target: avr
Configured with: ../../source/gcc-master/configure --target=avr --disable-nls --with-dwarf2 --with-gnu-as --with-gnu-ld --disable-shared --enable-languages=c,c++
Thread model: single
Supported LTO compression algorithms: zlib
gcc version 15.0.0 20241119 (experimental) (GCC)
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index d316e0182a2..f2474f46d6b 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -169,6 +169,8 @@ extern rtx cc_reg_rtx;
 extern rtx ccn_reg_rtx;
 extern rtx cczn_reg_rtx;
 
+extern bool avr_shift_is_3op;
+
 #endif /* RTX_CODE */
 
 #ifdef REAL_VALUE_TYPE
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 508e2d147bf..8b8801e44ec 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -229,6 +229,14 @@ bool avr_need_clear_bss_p = false;
 bool avr_need_copy_data_p = false;
 bool avr_has_rodata_p = false;
 
+/* Whether some shift insn alternatives are a 3-operand insn
+   or a 2-operand insn.  This is used when shift insns are
+   split by ???.  The splitted alternatives allow the source
+   and the destination register of the shift to be different
+   right from the start, because the split will split off
+   a byte-shift which allows 3 operands.  */
+bool avr_shift_is_3op = false;
+
 
 /* Transform UP into lowercase and write the result to LO.
    You must provide enough space for LO.  Return LO.  */
@@ -437,6 +445,8 @@ avr_set_core_architecture (void)
 static void
 avr_option_override (void)
 {
+  avr_shift_is_3op = true;
+
   /* caller-save.cc looks for call-clobbered hard registers that are assigned
      to pseudos that cross calls and tries so save-restore them around calls
      in order to reduce the number of stack slots needed.
@@ -6590,7 +6600,7 @@ avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
 
 
 /* Generate asm equivalent for various shifts.  This only handles cases
-   that are not already carefully hand-optimized in ?sh??i3_out.
+   that are not already carefully hand-optimized in ?sh<mode>3_out.
 
    OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
    OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 04d838ef8a7..73306f6b85a 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -184,73 +184,75 @@ (define_attr "adjust_len"
 ;; no_xmega: non-XMEGA core              xmega : XMEGA core
 ;; no_adiw:  ISA has no ADIW, SBIW       adiw  : ISA has ADIW, SBIW
 
+;; The following ISA attributes are actually not architecture specific,
+;; but depend on (optimization) options.  This is because the "enabled"
+;; attribut can't depend on more than one other attribute.  This means
+;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue
+;; scheme can be used (as opposed to a true cartesian product).
+
+;; 2op  : insn is a 2-operand insn       3op   : insn is a 3-operand insn
+
 (define_attr "isa"
   "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
    no_adiw,adiw,
+   2op,3op,
    standard"
   (const_string "standard"))
 
 (define_attr "enabled" ""
-  (cond [(eq_attr "isa" "standard")
-         (const_int 1)
+  (if_then_else
+    (ior (eq_attr "isa" "standard")
 
          (and (eq_attr "isa" "mov")
               (match_test "!AVR_HAVE_MOVW"))
-         (const_int 1)
 
          (and (eq_attr "isa" "movw")
               (match_test "AVR_HAVE_MOVW"))
-         (const_int 1)
 
          (and (eq_attr "isa" "rjmp")
               (match_test "!AVR_HAVE_JMP_CALL"))
-         (const_int 1)
 
          (and (eq_attr "isa" "jmp")
               (match_test "AVR_HAVE_JMP_CALL"))
-         (const_int 1)
 
          (and (eq_attr "isa" "ijmp")
               (match_test "!AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
 
          (and (eq_attr "isa" "eijmp")
               (match_test "AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
 
          (and (eq_attr "isa" "lpm")
               (match_test "!AVR_HAVE_LPMX"))
-         (const_int 1)
 
          (and (eq_attr "isa" "lpmx")
               (match_test "AVR_HAVE_LPMX"))
-         (const_int 1)
 
          (and (eq_attr "isa" "elpm")
               (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
-         (const_int 1)
 
          (and (eq_attr "isa" "elpmx")
               (match_test "AVR_HAVE_ELPMX"))
-         (const_int 1)
 
          (and (eq_attr "isa" "xmega")
               (match_test "AVR_XMEGA"))
-         (const_int 1)
 
          (and (eq_attr "isa" "no_xmega")
               (match_test "!AVR_XMEGA"))
-         (const_int 1)
 
          (and (eq_attr "isa" "adiw")
               (match_test "AVR_HAVE_ADIW"))
-         (const_int 1)
 
          (and (eq_attr "isa" "no_adiw")
               (match_test "!AVR_HAVE_ADIW"))
-         (const_int 1)
 
-         ] (const_int 0)))
+         (and (eq_attr "isa" "2op")
+              (match_test "!avr_shift_is_3op"))
+
+         (and (eq_attr "isa" "3op")
+              (match_test "avr_shift_is_3op"))
+         )
+    (const_int 1)
+    (const_int 0)))
 
 
 ;; Define mode iterators
@@ -5257,28 +5259,31 @@ (define_peephole2
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r    ,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r    ,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O C31,K,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,0  ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C31,C07,C07,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (ashift:ALL4 (match_dup 1)
                                 (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,2op,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r    ,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r    ,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O C31,K,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,0  ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C31,C07,C07,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "8,0,4,5,8,10,12")
-   (set_attr "adjust_len" "ashlsi")])
+  [(set_attr "length" "12")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "isa" "*,*,*,2op,3op,*,*")])
 
 ;; Optimize if a scratch register from LD_REGS happens to be available.
 
@@ -5396,32 +5401,35 @@ (define_peephole2
 ;; "*ashlsi3_const"
 ;; "*ashlsq3_const"  "*ashlusq3_const"
 ;; "*ashlsa3_const"  "*ashlusa3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r    ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r    ,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O C31,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X    ,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL4 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+;; (define_insn_and_split "*ashl<mode>3_const_split"
+;;   [(set (match_operand:ALL4 0 "register_operand"              "=r ,r    ,r  ,r  ,r")
+;;         (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0 ,r    ,0  ,r  ,0")
+;;                      (match_operand:QI 2 "const_int_operand"   "LP,O C31,C07,C07,n")))
+;;    (clobber (match_scratch:QI 3                               "=X ,X    ,&d ,&d ,&d"))]
+;;   "reload_completed"
+;;   "#"
+;;   "&& reload_completed"
+;;   [(parallel [(set (match_dup 0)
+;;                    (ashift:ALL4 (match_dup 1)
+;;                                 (match_dup 2)))
+;;               (clobber (match_dup 3))
+;;               (clobber (reg:CC REG_CC))])]
+;;   ""
+;;   [(set_attr "isa" "*,*,2op,3op,*")])
 
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r    ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r    ,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O C31,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X    ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r ,r    ,r  ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0 ,r    ,0  ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"     "LP,O C31,C07,C07,n")))
+   (clobber (match_operand:QI 3 "scratch_or_d_register_operand" "=X ,X    ,&d ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,4,5,10")
-   (set_attr "adjust_len" "ashlsi")])
+  [(set_attr "length" "10")
+   (set_attr "adjust_len" "ashlsi")
+   (set_attr "isa" "*,*,2op,3op,*")])
 
 (define_expand "ashlpsi3"
   [(parallel [(set (match_operand:PSI 0 "register_operand"             "")
;; Function ashl32_7 (ashl32_7, funcdef_no=0, decl_uid=1900, cgraph_uid=1, 
symbol_order=0)

starting the processing of deferred insns
ending the processing of deferred insns
df_analyze called


ashl32_7

Dataflow summary:
def_info->table_size = 0, use_info->table_size = 0
;;  fully invalidated by EH      0 [r0] 1 [r1] 18 [r18] 19 [r19] 20 [r20] 21 
[r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 26 [r26] 27 [r27] 30 [r30] 31 [r31] 
33 [__SP_H__] 35 [argH] 36 [cc]
;;  hardware regs used   32 [__SP_L__]
;;  regular block artificial uses        32 [__SP_L__]
;;  eh block artificial uses     32 [__SP_L__] 34 [argL]
;;  entry block defs     8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 
[r14] 15 [r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 
23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;;  exit block uses      22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;;  regs ever live       20 [r20] 21 [r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 
36 [cc]
;;  ref usage   r8={1d} r9={1d} r10={1d} r11={1d} r12={1d} r13={1d} r14={1d} 
r15={1d} r16={1d} r17={1d} r18={1d} r19={1d} r20={1d,1u} r21={1d,1u} 
r22={2d,3u} r23={2d,3u} r24={2d,2u} r25={2d,2u} r32={1d,2u} r36={1d} 
;;    total ref usage 38{24d,14u,0e} in 3{3 regular + 0 call} insns.

( )->[0]->( 2 )
;; bb 0 artificial_defs: { d-1(8){ }d-1(9){ }d-1(10){ }d-1(11){ }d-1(12){ 
}d-1(13){ }d-1(14){ }d-1(15){ }d-1(16){ }d-1(17){ }d-1(18){ }d-1(19){ }d-1(20){ 
}d-1(21){ }d-1(22){ }d-1(23){ }d-1(24){ }d-1(25){ }d-1(32){ }}
;; bb 0 artificial_uses: { }
;; lr  in       
;; lr  use      
;; lr  def       8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 [r14] 15 
[r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 23 [r23] 
24 [r24] 25 [r25] 32 [__SP_L__]
;; live  in     
;; live  gen     8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 [r14] 15 
[r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 23 [r23] 
24 [r24] 25 [r25] 32 [__SP_L__]
;; live  kill   
;; lr  out       20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live  out     20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]

( 0 )->[2]->( 1 )
;; bb 2 artificial_defs: { }
;; bb 2 artificial_uses: { u-1(32){ }}
;; lr  in        20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr  use       20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr  def       22 [r22] 23 [r23] 24 [r24] 25 [r25] 36 [cc]
;; live  in      20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live  gen     22 [r22] 23 [r23] 24 [r24] 25 [r25]
;; live  kill    36 [cc]
;; lr  out       22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live  out     22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]

( 2 )->[1]->( )
;; bb 1 artificial_defs: { }
;; bb 1 artificial_uses: { u-1(22){ }u-1(23){ }u-1(24){ }u-1(25){ }u-1(32){ }}
;; lr  in        22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; lr  use       22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; lr  def      
;; live  in      22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live  gen    
;; live  kill   
;; lr  out      
;; live  out    

Finding needed instructions:
  Adding insn 20 to worklist
  Adding insn 13 to worklist
Finished finding needed instructions:
processing block 2 lr out =  22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
  Adding insn 17 to worklist
Splitting with gen_peephole2_100 (avr.md:5388)
scanning new insn with uid = 22.
deleting insn with uid = 17.
verify found no changes in insn with uid = 22.
starting the processing of deferred insns
ending the processing of deferred insns


ashl32_7

Dataflow summary:
;;  fully invalidated by EH      0 [r0] 1 [r1] 18 [r18] 19 [r19] 20 [r20] 21 
[r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 26 [r26] 27 [r27] 30 [r30] 31 [r31] 
33 [__SP_H__] 35 [argH] 36 [cc]
;;  hardware regs used   32 [__SP_L__]
;;  regular block artificial uses        32 [__SP_L__]
;;  eh block artificial uses     32 [__SP_L__] 34 [argL]
;;  entry block defs     8 [r8] 9 [r9] 10 [r10] 11 [r11] 12 [r12] 13 [r13] 14 
[r14] 15 [r15] 16 [r16] 17 [r17] 18 [r18] 19 [r19] 20 [r20] 21 [r21] 22 [r22] 
23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;;  exit block uses      22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;;  regs ever live       20 [r20] 21 [r21] 22 [r22] 23 [r23] 24 [r24] 25 [r25] 
36 [cc]
;;  ref usage   r8={1d} r9={1d} r10={1d} r11={1d} r12={1d} r13={1d} r14={1d} 
r15={1d} r16={1d} r17={1d} r18={1d} r19={1d} r20={1d,1u} r21={1d,1u} 
r22={2d,3u} r23={2d,3u} r24={3d,2u} r25={2d,2u} r32={1d,2u} r36={1d} 
;;    total ref usage 39{25d,14u,0e} in 3{3 regular + 0 call} insns.
(note 1 0 5 NOTE_INSN_DELETED)
;; basic block 2, loop depth 0, count 1073741824 (estimated locally, freq 
1.0000), maybe hot
;;  prev block 0, next block 1, flags: (REACHABLE, RTL, MODIFIED)
;;  pred:       ENTRY [always]  count:1073741824 (estimated locally, freq 
1.0000) (FALLTHRU)
;; bb 2 artificial_defs: { }
;; bb 2 artificial_uses: { u-1(32){ }}
;; lr  in        20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr  use       20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; lr  def       22 [r22] 23 [r23] 24 [r24] 25 [r25] 36 [cc]
;; live  in      20 [r20] 21 [r21] 22 [r22] 23 [r23] 32 [__SP_L__]
;; live  gen     22 [r22] 23 [r23] 24 [r24] 25 [r25]
;; live  kill    36 [cc]
(note 5 1 18 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
(note 18 5 3 2 NOTE_INSN_PROLOGUE_END)
(note 3 18 4 2 NOTE_INSN_DELETED)
(note 4 3 22 2 NOTE_INSN_FUNCTION_BEG)
(insn 22 4 13 2 (parallel [
            (set (reg:SI 22 r22 [orig:46 _2 ] [46])
                (ashift:SI (reg:SI 20 r20 [orig:47 x ] [47])
                    (const_int 7 [0x7])))
            (clobber (reg:QI 24 r24))
            (clobber (reg:CC 36 cc))
        ]) "bogus-peep2.c":3:14 -1
     (nil))
(insn 13 22 19 2 (use (reg/i:SI 22 r22)) "bogus-peep2.c":4:1 -1
     (nil))
(note 19 13 20 2 NOTE_INSN_EPILOGUE_BEG)
(jump_insn 20 19 21 2 (return) "bogus-peep2.c":4:1 -1
     (nil)
 -> return)
;;  succ:       EXIT [always]  count:1073741824 (estimated locally, freq 1.0000)
;; lr  out       22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]
;; live  out     22 [r22] 23 [r23] 24 [r24] 25 [r25] 32 [__SP_L__]

(barrier 21 20 16)
(note 16 21 0 NOTE_INSN_DELETED)

Reply via email to