This patch improves 16=8*8 and 16*16*8 multiplication. mulhi3 is extended to care for small, signed 9-bit constants that can be handled more efficiently than loading the constants and performing a 16-bit multiplication.
Some combine patterns are added to match if one operand is a small constant which is then split in the remainder. Cost computation adapted to the new patterns. Test cases attached to the PRs compile to good code now. Tested without regressions. Ok to commit? PR target/36467 PR target/49687 * config/avr/avr.md (mulhi3): Use register_or_s9_operand for operand2 and expand appropriately if there is a CONST_INT in operand2. (usmulqihi3): New insn. (*sumulqihi3): New insn. (*osmulqihi3): New insn. (*oumulqihi3): New insn. (*muluqihi3.uconst): New insn_and_split. (*muluqihi3.sconst): New insn_and_split. (*mulsqihi3.sconst): New insn_and_split. (*mulsqihi3.uconst): New insn_and_split. (*mulsqihi3.oconst): New insn_and_split. (*ashifthi3.signx.const): New insn_and_split. (*ashifthi3.signx.const7): New insn_and_split. (*ashifthi3.zerox.const): New insn_and_split. (mulsqihi3): New insn. (muluqihi3): New insn. (muloqihi3): New insn. * config/avr/avr.c (avr_rtx_costs): Report costs of above insns. (avr_gate_split1): New function. * config/avr/avr-protos.h (avr_gate_split1): New prototype. * config/avr/predicates.md (const_2_to_7_operand): New. (const_2_to_6_operand): New. (u8_operand): New. (s8_operand): New. (o8_operand): New. (s9_operand): New. (register_or_s9_operand): New.
Index: config/avr/predicates.md =================================================================== --- config/avr/predicates.md (revision 176136) +++ config/avr/predicates.md (working copy) @@ -73,6 +73,16 @@ (define_predicate "const_0_to_7_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +;; Return 1 if OP is constant integer 2..7 for MODE. +(define_predicate "const_2_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 7)"))) + +;; Return 1 if OP is constant integer 2..6 for MODE. +(define_predicate "const_2_to_6_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 6)"))) + ;; Returns true if OP is either the constant zero or a register. (define_predicate "reg_or_0_operand" (ior (match_operand 0 "register_operand") @@ -156,3 +166,26 @@ (define_predicate "const_8_16_24_operand (and (match_code "const_int") (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)"))) +;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255. +(define_predicate "u8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) + +;; Signed CONST_INT that fits in 8 bits, i.e. -128..127. +(define_predicate "s8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -128, 127)"))) + +;; One-extended CONST_INT that fits in 8 bits, i.e. -256..-1. +(define_predicate "o8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -256, -1)"))) + +;; Signed CONST_INT that fits in 9 bits, i.e. -256..255. +(define_predicate "s9_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -256, 255)"))) + +(define_predicate "register_or_s9_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "s9_operand"))) Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 176276) +++ config/avr/avr.md (working copy) @@ -1017,19 +1017,309 @@ (define_insn "umulqihi3" [(set_attr "length" "3") (set_attr "cc" "clobber")]) +(define_insn "usmulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %2,%1 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;; Above insn is not canonicalized by insn combine, so here is a version with +;; operands swapped. + +(define_insn "*sumulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (zero_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;; One-extend operand 1 + +(define_insn "*osmulqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "a")))) + (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %2,%1 + movw %0,r0 + sub %B0,%2 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*oumulqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r")))) + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %2,%1 + movw %0,r0 + sub %B0,%2 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + + +;****************************************************************************** +; mul HI: $1 = sign/zero-extend, $2 = small constant +;****************************************************************************** + +(define_insn_and_split "*muluqihi3.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "u8_operand" "M")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*muluqihi3.sconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "s8_operand" "n")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*mulsqihi3.sconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "s8_operand" "n")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; mulqihi3 + (set (match_dup 0) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*mulsqihi3.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "u8_operand" "M")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 3)) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*mulsqihi3.oconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "o8_operand" "n")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; *osmulqihi3 + (set (match_dup 0) + (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 3)))) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +;; The EXTEND of $1 only appears in combine, we don't see it in expand so that +;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper +;; at that time. Fix that. + +(define_insn_and_split "*ashifthi3.signx.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "const_2_to_6_operand" "I")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; mulqihi3 + (set (match_dup 0) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*ashifthi3.signx.const7" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (const_int 7)))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 3)) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = gen_int_mode (1 << 7, QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +(define_insn_and_split "*ashifthi3.zerox.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "const_2_to_7_operand" "I")))] + "AVR_HAVE_MUL + && avr_gate_split1()" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode); + operands[3] = gen_reg_rtx (QImode); + }) + +;****************************************************************************** +; mul HI: $1 = sign-/zero-/one-extend, $2 = reg +;****************************************************************************** + +(define_insn "mulsqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "register_operand" "a")))] + "AVR_HAVE_MUL" + "mulsu %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "muluqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +;; one-extend operand 1 + +(define_insn "muloqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r")))) + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + sub %B0,%A2 + clr __zero_reg__" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +;****************************************************************************** + (define_expand "mulhi3" [(set (match_operand:HI 0 "register_operand" "") (mult:HI (match_operand:HI 1 "register_operand" "") - (match_operand:HI 2 "register_operand" "")))] + (match_operand:HI 2 "register_or_s9_operand" "")))] "" - " -{ - if (!AVR_HAVE_MUL) - { - emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); - DONE; - } -}") + { + if (!AVR_HAVE_MUL) + { + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + + emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); + DONE; + } + + /* For small constants we can do better by extending them on the fly. + The constant can be loaded in one instruction and the widening + multiplication is shorter. First try the unsigned variant because it + allows constraint "d" instead of "a" for the signed version. */ + + if (s9_operand (operands[2], HImode)) + { + rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); + + if (u8_operand (operands[2], HImode)) + { + emit_insn (gen_muluqihi3 (operands[0], reg, operands[1])); + } + else if (s8_operand (operands[2], HImode)) + { + emit_insn (gen_mulsqihi3 (operands[0], reg, operands[1])); + } + else + { + emit_insn (gen_muloqihi3 (operands[0], reg, operands[1])); + } + + DONE; + } + + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + }) (define_insn "*mulhi3_enh" [(set (match_operand:HI 0 "register_operand" "=&r") Index: config/avr/avr-protos.h =================================================================== --- config/avr/avr-protos.h (revision 176136) +++ config/avr/avr-protos.h (working copy) @@ -117,3 +117,4 @@ extern int class_max_nregs (enum reg_cla #ifdef REAL_VALUE_TYPE extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n); #endif +extern bool avr_gate_split1(void); Index: config/avr/avr.c =================================================================== --- config/avr/avr.c (revision 176276) +++ config/avr/avr.c (working copy) @@ -5473,7 +5473,42 @@ avr_rtx_costs (rtx x, int codearg, int o case HImode: if (AVR_HAVE_MUL) - *total = COSTS_N_INSNS (!speed ? 7 : 10); + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + enum rtx_code code0 = GET_CODE (op0); + enum rtx_code code1 = GET_CODE (op1); + bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0; + bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1; + + if (ex0 + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + if (ex0 + && register_operand (op1, HImode)) + { + *total = COSTS_N_INSNS (!speed ? 5 : 8); + return true; + } + else if (ex0 || ex1) + { + *total = COSTS_N_INSNS (!speed ? 3 : 5); + return true; + } + else if (register_operand (op0, HImode) + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 9); + return true; + } + else + *total = COSTS_N_INSNS (!speed ? 7 : 10); + } else if (!speed) *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); else @@ -5556,6 +5591,17 @@ avr_rtx_costs (rtx x, int codearg, int o break; case HImode: + if (AVR_HAVE_MUL) + { + if (const_2_to_7_operand (XEXP (x, 1), HImode) + && (SIGN_EXTEND == GET_CODE (XEXP (x, 0)) + || ZERO_EXTEND == GET_CODE (XEXP (x, 0)))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + } + if (GET_CODE (XEXP (x, 1)) != CONST_INT) { *total = COSTS_N_INSNS (!speed ? 5 : 41); @@ -6888,4 +6934,29 @@ avr_expand_builtin (tree exp, rtx target } +/* FIXME: We compose some insns by means of insn combine + and split them in split1. We don't want IRA/reload + to combine them to the original insns again because + that avoid some CSE optimizations if constants are + involved. If IRA/reload combines, the recombined + insns get split again after reload, but then CSE + does not take place. + It appears that at present there is no other way + to take away the insn from IRA. Notice that split1 + runs unconditionally so that all our insns will get + split no matter of command line options. */ + +#include "tree-pass.h" + +bool +avr_gate_split1 (void) +{ + if (current_pass->static_pass_number + < pass_match_asm_constraints.pass.static_pass_number) + return true; + + return false; +} + + #include "gt-avr.h"