As it turns out, logical 32-bit shifts with an offset of 25..30 can
be performed in 7 instructions or less.  This beats the 7 instruc-
tions required for the default code of a shift loop.
Plus, with zero overhead, these cases can be 3-operand.

This is only relevant for -Oz because with -Os, 3op shifts are
split with -msplit-bit-shift (which is not performed with -Oz).

Passes without new regressions.  Ok for trunk?

Johann

--

AVR: PR117726 - Tweak 32-bit logical shifts of 25...30 for -Oz.

As it turns out, logical 32-bit shifts with an offset of 25..30 can
be performed in 7 instructions or less.  This beats the 7 instruc-
tions required for the default code of a shift loop.
Plus, with zero overhead, these cases can be 3-operand.

This is only relevant for -Oz because with -Os, 3op shifts are
split with -msplit-bit-shift (which is not performed with -Oz).

        PR target/117726
gcc/
        * config/avr/avr.cc (avr_ld_regno_p): New function.
        (ashlsi3_out) [case 25,26,27,28,29,30]: Handle and tweak.
        (lshrsi3_out): Same.
        (avr_rtx_costs_1) [SImode, ASHIFT, LSHIFTRT]: Adjust costs.
        * config/avr/avr.md (ashlsi3, *ashlsi3, *ashlsi3_const):
        Add "r,r,C4L" alternative.
        (lshrsi3, *lshrsi3, *lshrsi3_const): Add "r,r,C4R" alternative.
        * config/avr/constraints.md (C4R, C4L): New,
gcc/testsuite/
        * gcc.target/avr/torture/avr-torture.exp (AVR_TORTURE_OPTIONS):
        Turn one option variant into -Oz.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index e5a5aa34ec0..8628a438ab5 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -418,6 +418,15 @@ avr_adiw_reg_p (rtx reg)
 }
 
 
+/* Return true iff REGNO is in R16...R31.  */
+
+static bool
+avr_ld_regno_p (int regno)
+{
+  return TEST_HARD_REG_CLASS (LD_REGS, regno);
+}
+
+
 static bool
 ra_in_progress ()
 {
@@ -7397,17 +7406,20 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+			&& XVECLEN (PATTERN (insn), 0) == 3
+			&& REG_P (operands[3]));
       if (plen)
 	*plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
 	{
 	default:
-	  if (INTVAL (operands[2]) < 32)
+	  if (off < 32)
 	    break;
 
 	  return AVR_HAVE_MOVW
@@ -7461,11 +7473,58 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 			   "mov %D0,%B1"  CR_TAB
 			   "clr %B0"      CR_TAB
 			   "clr %A0", operands, plen, 4);
+	case 30:
+	  if (AVR_HAVE_MUL && scratch_p)
+	    return avr_asm_len ("ldi %3,1<<6"       CR_TAB
+				"mul %3,%A1"        CR_TAB
+				"mov %D0,r0"        CR_TAB
+				"clr __zero_reg__"  CR_TAB
+				"clr %C0"           CR_TAB
+				"clr %B0"           CR_TAB
+				"clr %A0", operands, plen, 7);
+	  // Fallthrough
+
+	case 28:
+	case 29:
+	  {
+	    const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0
+	    const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1
+	    if (ld_reg0_p
+		|| (ld_reg1_p && reg1_unused_after)
+		|| scratch_p)
+	      {
+		if (ld_reg0_p)
+		  avr_asm_len ("mov %D0,%A1"    CR_TAB
+			       "swap %D0"       CR_TAB
+			       "andi %D0,0xf0", operands, plen, 3);
+		else if (ld_reg1_p && reg1_unused_after)
+		  avr_asm_len ("swap %A1"       CR_TAB
+			       "andi %A1,0xf0"  CR_TAB
+			       "mov %D0,%A1", operands, plen, 3);
+		else
+		  avr_asm_len ("mov %D0,%A1"    CR_TAB
+			       "swap %D0"       CR_TAB
+			       "ldi %3,0xf0"    CR_TAB
+			       "and %D0,%3", operands, plen, 4);
+		for (int i = 28; i < off; ++i)
+		  avr_asm_len ("lsl %D0", operands, plen, 1);
+		return avr_asm_len ("clr %C0"  CR_TAB
+				    "clr %B0"  CR_TAB
+				    "clr %A0", operands, plen, 3);
+	      }
+	  }
+	  // Fallthrough
+
 	case 24:
-	  return avr_asm_len ("mov %D0,%A1"  CR_TAB
-			      "clr %C0"      CR_TAB
+	case 25:
+	case 26:
+	case 27:
+	  avr_asm_len ("mov %D0,%A1", operands, plen, 1);
+	  for (int i = 24; i < off; ++i)
+	    avr_asm_len ("lsl %D0", operands, plen, 1);
+	  return avr_asm_len ("clr %C0"      CR_TAB
 			      "clr %B0"      CR_TAB
-			      "clr %A0", operands, plen, 4);
+			      "clr %A0", operands, plen, 3);
 	case 31:
 	  return AVR_HAVE_MOVW
 	    ? avr_asm_len ("bst %A1,0"    CR_TAB
@@ -8298,17 +8357,20 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+			&& XVECLEN (PATTERN (insn), 0) == 3
+			&& REG_P (operands[3]));
       if (plen)
 	*plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
 	{
 	default:
-	  if (INTVAL (operands[2]) < 32)
+	  if (off < 32)
 	    break;
 
 	  return AVR_HAVE_MOVW
@@ -8362,11 +8424,58 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 			   "mov %A0,%C1" CR_TAB
 			   "clr %C0"     CR_TAB
 			   "clr %D0", operands, plen, 4);
+	case 30:
+	  if (AVR_HAVE_MUL && scratch_p)
+	    return avr_asm_len ("ldi %3,1<<2"       CR_TAB
+				"mul %3,%D1"        CR_TAB
+				"mov %A0,r1"        CR_TAB
+				"clr __zero_reg__"  CR_TAB
+				"clr %B0"           CR_TAB
+				"clr %C0"           CR_TAB
+				"clr %D0", operands, plen, 7);
+	  // Fallthrough
+
+	case 29:
+	case 28:
+	  {
+	    const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0
+	    const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1
+	    if (ld_reg0_p
+		|| (ld_reg1_p && reg1_unused_after)
+		|| scratch_p)
+	      {
+		if (ld_reg0_p)
+		  avr_asm_len ("mov %A0,%D1"    CR_TAB
+			       "swap %A0"       CR_TAB
+			       "andi %A0,0x0f", operands, plen, 3);
+		else if (ld_reg1_p && reg1_unused_after)
+		  avr_asm_len ("swap %D1"       CR_TAB
+			       "andi %D1,0x0f"  CR_TAB
+			       "mov %A0,%D1", operands, plen, 3);
+		else
+		  avr_asm_len ("mov %A0,%D1"    CR_TAB
+			       "swap %A0"       CR_TAB
+			       "ldi %3,0x0f"    CR_TAB
+			       "and %A0,%3", operands, plen, 4);
+		for (int i = 28; i < off; ++i)
+		  avr_asm_len ("lsr %A0", operands, plen, 1);
+		return avr_asm_len ("clr %B0"  CR_TAB
+				    "clr %C0"  CR_TAB
+				    "clr %D0", operands, plen, 3);
+	      }
+	  }
+	  // Fallthrough
+
+	case 27:
+	case 26:
+	case 25:
 	case 24:
-	  return avr_asm_len ("mov %A0,%D1" CR_TAB
-			      "clr %B0"     CR_TAB
+	  avr_asm_len ("mov %A0,%D1", operands, plen, 1);
+	  for (int i = 24; i < off; ++i)
+	    avr_asm_len ("lsr %A0", operands, plen, 1);
+	  return avr_asm_len ("clr %B0"     CR_TAB
 			      "clr %C0"     CR_TAB
-			      "clr %D0", operands, plen, 4);
+			      "clr %D0", operands, plen, 3);
 	case 31:
 	  return AVR_HAVE_MOVW
 	    ? avr_asm_len ("bst %D1,7"    CR_TAB
@@ -13037,9 +13146,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 0:
 		*total = 0;
 		break;
-	      case 24:
-		*total = COSTS_N_INSNS (3);
-		break;
 	      case 1:
 	      case 8:
 		*total = COSTS_N_INSNS (4);
@@ -13050,6 +13156,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 16:
 		*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
 		break;
+	      case 24:
+	      case 25:
+	      case 26:
+	      case 27:
+		*total = COSTS_N_INSNS (4 + val1 - 24);
+		break;
+	      case 28:
+	      case 29:
+		*total = COSTS_N_INSNS (6 + val1 - 28);
+		break;
+	      case 30:
+		*total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
+		break;
 	      case 31:
 		*total = COSTS_N_INSNS (6);
 		break;
@@ -13346,6 +13465,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = 0;
 		break;
 	      case 1:
+	      case 8:
 		*total = COSTS_N_INSNS (4);
 		break;
 	      case 2:
@@ -13357,9 +13477,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 16:
 		*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
 		break;
-	      case 8:
 	      case 24:
-		*total = COSTS_N_INSNS (4);
+	      case 25:
+	      case 26:
+	      case 27:
+		*total = COSTS_N_INSNS (4 + val1 - 24);
+		break;
+	      case 28:
+	      case 29:
+		*total = COSTS_N_INSNS (6 + val1 - 28);
+		break;
+	      case 30:
+		*total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
 		break;
 	      case 31:
 		*total = COSTS_N_INSNS (6);
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 594940c6781..6550fadd101 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -5363,9 +5363,9 @@ (define_peephole2
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4l,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5377,9 +5377,9 @@ (define_insn_and_split "ashl<mode>3"
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4l,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -5564,10 +5564,10 @@ (define_peephole2 ; *ashrsi3_const  *lshrsi3_const  *ashlsi3_const
 ;; "*ashlsq3_const"  "*ashlusq3_const"
 ;; "*ashlsa3_const"  "*ashlusa3_const"
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r        ,r  ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        ,r  ,0")
-                     (match_operand:QI 2 "const_int_operand"  "LP,O C15 C31,C4l,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X        ,&d ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r    ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"  "LP,O C4L,C4l,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -5955,9 +5955,9 @@ (define_insn "*lshrpsi3"
 ;; "lshrsq3"  "lshrusq3"
 ;; "lshrsa3"  "lshrusa3"
 (define_insn_and_split "lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r        ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4r,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5969,9 +5969,9 @@ (define_insn_and_split "lshr<mode>3"
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r        ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4r,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -6059,10 +6059,10 @@ (define_insn "*lshr<mode>3_const"
 ;; "*lshrsq3_const"  "*lshrusq3_const"
 ;; "*lshrsa3_const"  "*lshrusa3_const"
 (define_insn "*lshr<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r        ,r  ,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        ,r  ,0")
-                       (match_operand:QI 2 "const_int_operand"  "LP,O C15 C31,C4r,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X        ,&d ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r    ,r  ,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"  "LP,O C4R,C4r,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index fc8d4d56a66..2ca9cc3d88d 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -328,6 +328,15 @@ (define_constraint "C4l"
   (and (match_code "const_int")
        (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
 
+(define_constraint "C4R"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "C4L"
+  "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
 
 ;; CONST_FIXED is no element of 'n' so cook our own.
 ;; "i" or "s" would match but because the insn uses iterators that cover
diff --git a/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp b/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
index 158ef2d4c5a..91bbe26f10b 100644
--- a/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
+++ b/gcc/testsuite/gcc.target/avr/torture/avr-torture.exp
@@ -45,7 +45,7 @@ dg-init
 	{ -Os -fomit-frame-pointer } \
 	{ -Os -fomit-frame-pointer -finline-functions } \
 	{ -O3 -g } \
-	{ -Os -mcall-prologues} ]
+	{ -Oz -mcall-prologues} ]
 
 
 #Initialize use of torture lists. 

Reply via email to