This patch adds {u|s}mul{qi|hi}3_highpart patterns which can help dividing by a constant when MUL is available, e.g.
int8_t sdiv1 (int8_t a) { return a / 3; } uint8_t udiv1 (uint8_t a) { return a / 3; } uint16_t udiv2 (uint16_t a) { return a / 10; } compiles with -O2 -mmcu=atmega8 to sdiv1: ldi r25,lo8(86) ; 6 *movqi/2 [length = 1] muls r24,r25 ; 7 smulqi3_highpart [length = 3] mov r25,r1 clr __zero_reg__ sbrc r24,7 ; 14 *subqi3.ashiftrt7 [length = 2] inc r25 mov r24,r25 ; 22 *movqi/1 [length = 1] ret ; 25 return [length = 1] udiv1: ldi r25,lo8(-85) ; 6 *movqi/2 [length = 1] mul r24,r25 ; 7 umulqi3_highpart [length = 3] mov r24,r1 clr __zero_reg__ lsr r24 ; 13 *lshrqi3/3 [length = 1] ret ; 22 return [length = 1] udiv2: movw r18,r24 ; 2 *movhi/1 [length = 1] ldi r26,lo8(-13107) ; 7 *movhi/4 [length = 2] ldi r27,hi8(-13107) call __umulhisi3 ; 8 *umulhi3_highpart_call [length = 2] lsr r25 ; 30 *lshrhi3_const/5 [length = 6] ror r24 lsr r25 ror r24 lsr r25 ror r24 ret ; 28 return [length = 1] For -Os these patterns are too expensive and the code is unchanged, i.e. __[u]divmod is called. Tested without regressions. Ok to commit? Johann PR target/49687 * config/avr/avr.md (smulqi3_highpart): New insn. (umulqi3_highpart): New insn. (*subqi3.ashiftrt7): New insn. (smulhi3_highpart): New expander. (umulhi3_highpart): Nex expander. (*smulhi3_highpart_call): New insn. (*umulhi3_highpart_call): New insn. (extend_u): New code attribute. (extend_prefix): Rename code attribute to extend_su. * config/avr/avr.c (avr_rtx_costs): Report costs of highpart of widening QI/HI multiply.
Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 177616) +++ config/avr/avr.md (working copy) @@ -141,10 +141,14 @@ (define_code_iterator any_extend [sign_ (define_code_iterator any_extend2 [sign_extend zero_extend]) ;; Define code attributes -(define_code_attr extend_prefix +(define_code_attr extend_su [(sign_extend "s") (zero_extend "u")]) +(define_code_attr extend_u + [(sign_extend "") + (zero_extend "u")]) + ;;======================================================================== ;; The following is used by nonlocal_goto and setjmp. @@ -1015,6 +1019,43 @@ (define_insn "*mulqi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) +(define_insn "smulqi3_highpart" + [(set (match_operand:QI 0 "register_operand" "=r") + (truncate:QI + (lshiftrt:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))) + (const_int 8))))] + "AVR_HAVE_MUL" + "muls %1,%2 + mov %0,r1 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "umulqi3_highpart" + [(set (match_operand:QI 0 "register_operand" "=r") + (truncate:QI + (lshiftrt:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))) + (const_int 8))))] + "AVR_HAVE_MUL" + "mul %1,%2 + mov %0,r1 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;; Used when expanding div or mod inline for some special values +(define_insn "*subqi3.ashiftrt7" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 1 "register_operand" "0") + (ashiftrt:QI (match_operand:QI 2 "register_operand" "r") + (const_int 7))))] + "" + "sbrc %2,7\;inc %0" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + (define_insn "mulqihi3" [(set (match_operand:HI 0 "register_operand" "=r") (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) @@ -1367,9 +1408,7 @@ (define_insn "*mulhi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -;; Operand 2 (reg:SI 18) not clobbered on the enhanced core. -;; All call-used registers clobbered otherwise - normal library call. -;; To support widening multiplicatioon with constant we postpone +;; To support widening multiplicatioon with constant we postpone ;; expanding to the implicit library call until post combine and ;; prior to register allocation. Clobber all hard registers that ;; might be used by the (widening) multiply until it is split and @@ -1535,19 +1574,12 @@ (define_insn_and_split "mulohisi3" (reg:SI 22))] "") -(define_expand "mulhisi3" +;; "mulhisi3" +;; "umulhisi3" +(define_expand "<extend_u>mulhisi3" [(parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "")) - (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) - (clobber (reg:HI 26)) - (clobber (reg:DI 18))])] - "AVR_HAVE_MUL" - "") - -(define_expand "umulhisi3" - [(parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "")) - (zero_extend:SI (match_operand:HI 2 "register_operand" "")))) + (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand" "")) + (any_extend:SI (match_operand:HI 2 "register_operand" "")))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] "AVR_HAVE_MUL" @@ -1567,7 +1599,7 @@ (define_expand "usmulhisi3" ;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3" ;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3" (define_insn_and_split - "*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3" + "*<any_extend:extend_su><any_extend2:extend_su>mul<QIHI:mode><QIHI2:mode>si3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r")))) @@ -1618,6 +1650,24 @@ (define_insn_and_split } }) +;; "smulhi3_highpart" +;; "umulhi3_highpart" +(define_expand "<extend_su>mulhi3_highpart" + [(set (reg:HI 18) + (match_operand:HI 1 "nonmemory_operand" "")) + (set (reg:HI 26) + (match_operand:HI 2 "nonmemory_operand" "")) + (parallel [(set (reg:HI 24) + (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))) + (const_int 16)))) + (clobber (reg:HI 22))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "AVR_HAVE_MUL" + "") + + (define_insn "*mulsi3_call" [(set (reg:SI 22) (mult:SI (reg:SI 22) @@ -1628,21 +1678,27 @@ (define_insn "*mulsi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -(define_insn "*mulhisi3_call" +;; "*mulhisi3_call" +;; "*umulhisi3_call" +(define_insn "*<extend_u>mulhisi3_call" [(set (reg:SI 22) - (mult:SI (sign_extend:SI (reg:HI 18)) - (sign_extend:SI (reg:HI 26))))] + (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))))] "AVR_HAVE_MUL" - "%~call __mulhisi3" + "%~call __<extend_u>mulhisi3" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -(define_insn "*umulhisi3_call" - [(set (reg:SI 22) - (mult:SI (zero_extend:SI (reg:HI 18)) - (zero_extend:SI (reg:HI 26))))] +;; "*umulhi3_highpart_call" +;; "*smulhi3_highpart_call" +(define_insn "*<extend_su>mulhi3_highpart_call" + [(set (reg:HI 24) + (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))) + (const_int 16)))) + (clobber (reg:HI 22))] "AVR_HAVE_MUL" - "%~call __umulhisi3" + "%~call __<extend_u>mulhisi3" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) @@ -1655,21 +1711,12 @@ (define_insn "*usmulhisi3_call" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) -(define_insn "*muluhisi3_call" +(define_insn "*mul<extend_su>hisi3_call" [(set (reg:SI 22) - (mult:SI (zero_extend:SI (reg:HI 26)) - (reg:SI 18)))] - "AVR_HAVE_MUL" - "%~call __muluhisi3" - [(set_attr "type" "xcall") - (set_attr "cc" "clobber")]) - -(define_insn "*mulshisi3_call" - [(set (reg:SI 22) - (mult:SI (sign_extend:SI (reg:HI 26)) + (mult:SI (any_extend:SI (reg:HI 26)) (reg:SI 18)))] "AVR_HAVE_MUL" - "%~call __mulshisi3" + "%~call __mul<extend_su>hisi3" [(set_attr "type" "xcall") (set_attr "cc" "clobber")]) @@ -2269,7 +2316,7 @@ (define_insn "ashlhi3" ;; "*ashluqihiqi3" ;; "*ashlsqihiqi3" -(define_insn_and_split "*ashl<extend_prefix>qihiqi3" +(define_insn_and_split "*ashl<extend_su>qihiqi3" [(set (match_operand:QI 0 "register_operand" "=r") (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "r")) @@ -2287,7 +2334,7 @@ (define_insn_and_split "*ashl<extend_pre ;; "*ashluqihiqi3.mem" ;; "*ashlsqihiqi3.mem" -(define_insn_and_split "*ashl<extend_prefix>qihiqi3.mem" +(define_insn_and_split "*ashl<extend_su>qihiqi3.mem" [(set (match_operand:QI 0 "memory_operand" "=m") (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "r")) (match_operand:QI 2 "register_operand" "r")) Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 177558) +++ config/avr/avr.c (working copy) @@ -5954,6 +5954,20 @@ avr_rtx_costs (rtx x, int codearg, int o *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed); return true; + case TRUNCATE: + if (AVR_HAVE_MUL + && LSHIFTRT == GET_CODE (XEXP (x, 0)) + && MULT == GET_CODE (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + if (QImode == mode || HImode == mode) + { + *total = COSTS_N_INSNS (2); + return true; + } + } + break; + default: break; }