Rework the (u)maddhisi4 patterns and use VMAC2H(U) instruction instead of the 64bit MAC(U) instruction. This fixes the next execute.exp failures: arith-rand-ll.c -O2 execution test arith-rand-ll.c -O3 execution test pr78726.c -O2 execution test pr78726.c -O3 execution test
Backported to gcc11 too. gcc/ 2021-06-09 Claudiu Zissulescu <claz...@synopsys.com> * config/arc/arc.md (maddhisi4): Use VMAC2H instruction. (machi): New pattern. (umaddhisi4): Use VMAC2HU instruction. (umachi): New pattern. Signed-off-by: Claudiu Zissulescu <claz...@synopsys.com> --- gcc/config/arc/arc.md | 66 +++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 6f13b3a01d8..aed0b40728b 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -6025,48 +6025,64 @@ (define_insn "stack_irq_dwarf" ;; MAC and DMPY instructions -; Use MAC instruction to emulate 16bit mac. +; Use VMAC2H(U) instruction to emulate scalar 16bit mac. (define_expand "maddhisi4" [(match_operand:SI 0 "register_operand" "") (match_operand:HI 1 "register_operand" "") (match_operand:HI 2 "extend_operand" "") (match_operand:SI 3 "register_operand" "")] - "TARGET_PLUS_DMPY" + "TARGET_PLUS_MACD" "{ - rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); - rtx tmp1 = gen_reg_rtx (SImode); - rtx tmp2 = gen_reg_rtx (SImode); - rtx accl = gen_lowpart (SImode, acc_reg); - - emit_move_insn (accl, operands[3]); - emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1]))); - emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2]))); - emit_insn (gen_mac (tmp1, tmp2)); - emit_move_insn (operands[0], accl); + rtx acc_reg = gen_rtx_REG (SImode, ACC_REG_FIRST); + + emit_move_insn (acc_reg, operands[3]); + emit_insn (gen_machi (operands[1], operands[2])); + emit_move_insn (operands[0], acc_reg); DONE; }") -; The same for the unsigned variant, but using MACU instruction. +(define_insn "machi" + [(set (reg:SI ARCV2_ACC) + (plus:SI + (mult:SI (sign_extend:SI (match_operand:HI 0 "register_operand" "%r")) + (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))) + (reg:SI ARCV2_ACC)))] + "TARGET_PLUS_MACD" + "vmac2h\\t0,%0,%1" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "no") + (set_attr "cond" "nocond")]) + +; The same for the unsigned variant, but using VMAC2HU instruction. (define_expand "umaddhisi4" [(match_operand:SI 0 "register_operand" "") (match_operand:HI 1 "register_operand" "") - (match_operand:HI 2 "extend_operand" "") + (match_operand:HI 2 "register_operand" "") (match_operand:SI 3 "register_operand" "")] - "TARGET_PLUS_DMPY" + "TARGET_PLUS_MACD" "{ - rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); - rtx tmp1 = gen_reg_rtx (SImode); - rtx tmp2 = gen_reg_rtx (SImode); - rtx accl = gen_lowpart (SImode, acc_reg); - - emit_move_insn (accl, operands[3]); - emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1]))); - emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2]))); - emit_insn (gen_macu (tmp1, tmp2)); - emit_move_insn (operands[0], accl); + rtx acc_reg = gen_rtx_REG (SImode, ACC_REG_FIRST); + + emit_move_insn (acc_reg, operands[3]); + emit_insn (gen_umachi (operands[1], operands[2])); + emit_move_insn (operands[0], acc_reg); DONE; }") +(define_insn "umachi" + [(set (reg:SI ARCV2_ACC) + (plus:SI + (mult:SI (zero_extend:SI (match_operand:HI 0 "register_operand" "%r")) + (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))) + (reg:SI ARCV2_ACC)))] + "TARGET_PLUS_MACD" + "vmac2hu\\t0,%0,%1" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "no") + (set_attr "cond" "nocond")]) + (define_expand "maddsidi4" [(match_operand:DI 0 "register_operand" "") (match_operand:SI 1 "register_operand" "") -- 2.31.1