Gentle PING.
On Fri, Oct 9, 2020 at 5:24 PM Claudiu Zissulescu <claz...@gmail.com> wrote: > > From: Claudiu Zissulescu <claz...@gmail.com> > > ARC MYP7+ instructions add MAC instructions for vector and scalar data > types. This patch adds a madd pattern for 16it datum that is using the > 32bit MAC instruction, and dot_prod patterns for v4hi vector > types. The 64bit moves are also upgraded by using vadd2 instuction. > > gcc/ > xxxx-xx-xx Claudiu Zissulescu <claz...@synopsys.com> > > * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. > * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 > instructions. > (movdf_insn): Likewise. > (maddhisi4): New pattern. > (umaddhisi4): Likewise. > * config/arc/simdext.md (mov<mode>_int): Update pattern to use > vadd2. > (sdot_prodv4hi): New pattern. > (udot_prodv4hi): Likewise. > (arc_vec_<V_US>mac_hi_v4hi): Update/renamed to > arc_vec_<V_US>mac_v2hiv2si. > (arc_vec_<V_US>mac_v2hiv2si_zero): New pattern. > > Signed-off-by: Claudiu Zissulescu <claz...@gmail.com> > --- > gcc/config/arc/arc.c | 8 ++++ > gcc/config/arc/arc.md | 71 ++++++++++++++++++++++++--- > gcc/config/arc/constraints.md | 5 ++ > gcc/config/arc/simdext.md | 90 +++++++++++++++++++++++++++-------- > 4 files changed, 147 insertions(+), 27 deletions(-) > > diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c > index ec55cfde87a9..d5b521e75e67 100644 > --- a/gcc/config/arc/arc.c > +++ b/gcc/config/arc/arc.c > @@ -10202,6 +10202,14 @@ arc_split_move (rtx *operands) > return; > } > > + if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], mode) > + && even_register_operand (operands[1], mode)) > + { > + emit_move_insn (operands[0], operands[1]); > + return; > + } > + > if (TARGET_PLUS_QMACW > && GET_CODE (operands[1]) == CONST_VECTOR) > { > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index f9fc11e51a85..1720e8cd2f6f 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -1345,8 +1345,8 @@ archs4x, archs4xd" > ") > > (define_insn_and_split "*movdi_insn" > - [(set (match_operand:DI 0 "move_dest_operand" "=w, w,r, m") > - (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))] > + [(set (match_operand:DI 0 "move_dest_operand" "=r, r,r, m") > + (match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))] > "register_operand (operands[0], DImode) > || register_operand (operands[1], DImode) > || (satisfies_constraint_Cm3 (operands[1]) > @@ -1358,6 +1358,13 @@ archs4x, archs4xd" > default: > return \"#\"; > > + case 0: > + if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], DImode) > + && even_register_operand (operands[1], DImode)) > + return \"vadd2\\t%0,%1,0\"; > + return \"#\"; > + > case 2: > if (TARGET_LL64 > && memory_operand (operands[1], DImode) > @@ -1374,7 +1381,7 @@ archs4x, archs4xd" > return \"#\"; > } > }" > - "reload_completed" > + "&& reload_completed" > [(const_int 0)] > { > arc_split_move (operands); > @@ -1420,15 +1427,24 @@ archs4x, archs4xd" > "if (prepare_move_operands (operands, DFmode)) DONE;") > > (define_insn_and_split "*movdf_insn" > - [(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m") > - (match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))] > - "register_operand (operands[0], DFmode) || register_operand (operands[1], > DFmode)" > + [(set (match_operand:DF 0 "move_dest_operand" "=D,r,r,r,r,m") > + (match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))] > + "register_operand (operands[0], DFmode) > + || register_operand (operands[1], DFmode)" > "* > { > switch (which_alternative) > { > default: > return \"#\"; > + > + case 2: > + if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], DFmode) > + && even_register_operand (operands[1], DFmode)) > + return \"vadd2\\t%0,%1,0\"; > + return \"#\"; > + > case 4: > if (TARGET_LL64 > && ((even_register_operand (operands[0], DFmode) > @@ -6177,6 +6193,49 @@ archs4x, archs4xd" > [(set_attr "length" "0")]) > > ;; MAC and DMPY instructions > + > +; Use MAC instruction to emulate 16bit mac. > +(define_expand "maddhisi4" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:HI 1 "register_operand" "") > + (match_operand:HI 2 "extend_operand" "") > + (match_operand:SI 3 "register_operand" "")] > + "TARGET_PLUS_DMPY" > + "{ > + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); > + rtx tmp1 = gen_reg_rtx (SImode); > + rtx tmp2 = gen_reg_rtx (SImode); > + rtx accl = gen_lowpart (SImode, acc_reg); > + > + emit_move_insn (accl, operands[3]); > + emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1]))); > + emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2]))); > + emit_insn (gen_mac (tmp1, tmp2)); > + emit_move_insn (operands[0], accl); > + DONE; > + }") > + > +; The same for the unsigned variant, but using MACU instruction. > +(define_expand "umaddhisi4" > + [(match_operand:SI 0 "register_operand" "") > + (match_operand:HI 1 "register_operand" "") > + (match_operand:HI 2 "extend_operand" "") > + (match_operand:SI 3 "register_operand" "")] > + "TARGET_PLUS_DMPY" > + "{ > + rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST); > + rtx tmp1 = gen_reg_rtx (SImode); > + rtx tmp2 = gen_reg_rtx (SImode); > + rtx accl = gen_lowpart (SImode, acc_reg); > + > + emit_move_insn (accl, operands[3]); > + emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1]))); > + emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2]))); > + emit_insn (gen_macu (tmp1, tmp2)); > + emit_move_insn (operands[0], accl); > + DONE; > + }") > + > (define_expand "maddsidi4" > [(match_operand:DI 0 "register_operand" "") > (match_operand:SI 1 "register_operand" "") > diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md > index b7a563a72ada..a2a8e84ac45f 100644 > --- a/gcc/config/arc/constraints.md > +++ b/gcc/config/arc/constraints.md > @@ -493,6 +493,11 @@ > Condition Codes" > (and (match_code "reg") (match_test "cc_register (op, VOIDmode)"))) > > +(define_constraint "Ral" > + "@internal > + Accumulator register @code{ACCL} - do not reload into its class" > + (and (match_code "reg") > + (match_test "REGNO (op) == ACCL_REGNO"))) > > (define_constraint "Q" > "@internal > diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md > index 0e88b3dd815b..d2fc309ea876 100644 > --- a/gcc/config/arc/simdext.md > +++ b/gcc/config/arc/simdext.md > @@ -1400,8 +1400,7 @@ > (define_insn_and_split "*mov<mode>_insn" > [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m") > (match_operand:VWH 1 "general_operand" "i,r,m,r"))] > - "TARGET_PLUS_QMACW > - && (register_operand (operands[0], <MODE>mode) > + "(register_operand (operands[0], <MODE>mode) > || register_operand (operands[1], <MODE>mode))" > "* > { > @@ -1411,7 +1410,11 @@ > return \"#\"; > > case 1: > - return \"vadd2 %0, %1, 0\"; > + if (TARGET_PLUS_QMACW > + && even_register_operand (operands[0], <MODE>mode) > + && even_register_operand (operands[1], <MODE>mode)) > + return \"vadd2\\t%0,%1,0\"; > + return \"#\"; > > case 2: > if (TARGET_LL64) > @@ -1430,7 +1433,7 @@ > arc_split_move (operands); > DONE; > } > - [(set_attr "type" "move,move,load,store") > + [(set_attr "type" "move,multi,load,store") > (set_attr "predicable" "yes,no,no,no") > (set_attr "iscompact" "false,false,false,false") > ]) > @@ -1612,6 +1615,44 @@ > DONE; > }) > > +(define_expand "sdot_prodv4hi" > + [(match_operand:V2SI 0 "register_operand" "") > + (match_operand:V4HI 1 "register_operand" "") > + (match_operand:V4HI 2 "register_operand" "") > + (match_operand:V2SI 3 "register_operand" "")] > + "TARGET_PLUS_MACD" > +{ > + rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST); > + rtx op1_low = gen_lowpart (V2HImode, operands[1]); > + rtx op1_high = gen_highpart (V2HImode, operands[1]); > + rtx op2_low = gen_lowpart (V2HImode, operands[2]); > + rtx op2_high = gen_highpart (V2HImode, operands[2]); > + > + emit_move_insn (acc_reg, operands[3]); > + emit_insn (gen_arc_vec_smac_v2hiv2si_zero (op1_low, op2_low)); > + emit_insn (gen_arc_vec_smac_v2hiv2si (operands[0], op1_high, op2_high)); > + DONE; > +}) > + > +(define_expand "udot_prodv4hi" > + [(match_operand:V2SI 0 "register_operand" "") > + (match_operand:V4HI 1 "register_operand" "") > + (match_operand:V4HI 2 "register_operand" "") > + (match_operand:V2SI 3 "register_operand" "")] > + "TARGET_PLUS_MACD" > +{ > + rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST); > + rtx op1_low = gen_lowpart (V2HImode, operands[1]); > + rtx op1_high = gen_highpart (V2HImode, operands[1]); > + rtx op2_low = gen_lowpart (V2HImode, operands[2]); > + rtx op2_high = gen_highpart (V2HImode, operands[2]); > + > + emit_move_insn (acc_reg, operands[3]); > + emit_insn (gen_arc_vec_umac_v2hiv2si_zero (op1_low, op2_low)); > + emit_insn (gen_arc_vec_umac_v2hiv2si (operands[0], op1_high, op2_high)); > + DONE; > +}) > + > (define_insn "arc_vec_<V_US>mult_lo_v4hi" > [(set (match_operand:V2SI 0 "even_register_operand" > "=r,r") > (mult:V2SI (SE:V2SI (vec_select:V2HI > @@ -1704,30 +1745,37 @@ > } > ) > > -(define_insn "arc_vec_<V_US>mac_hi_v4hi" > - [(set (match_operand:V2SI 0 "even_register_operand" > "=r,r") > +(define_insn "arc_vec_<V_US>mac_v2hiv2si" > + [(set (match_operand:V2SI 0 "even_register_operand" > "=r,Ral,r") > (plus:V2SI > - (reg:V2SI ARCV2_ACC) > - (mult:V2SI (SE:V2SI (vec_select:V2HI > - (match_operand:V4HI 1 "even_register_operand" > "0,r") > - (parallel [(const_int 2) (const_int 3)]))) > - (SE:V2SI (vec_select:V2HI > - (match_operand:V4HI 2 "even_register_operand" > "r,r") > - (parallel [(const_int 2) (const_int 3)])))))) > + (mult:V2SI (SE:V2SI (match_operand:V2HI 1 "register_operand" "0, > r,r")) > + (SE:V2SI (match_operand:V2HI 2 "register_operand" "r, > r,r"))) > + (reg:V2SI ARCV2_ACC))) > (set (reg:V2SI ARCV2_ACC) > (plus:V2SI > - (reg:V2SI ARCV2_ACC) > - (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) > - (parallel [(const_int 2) > (const_int 3)]))) > - (SE:V2SI (vec_select:V2HI (match_dup 2) > - (parallel [(const_int 2) > (const_int 3)])))))) > + (mult:V2SI (SE:V2SI (match_dup 1)) > + (SE:V2SI (match_dup 2))) > + (reg:V2SI ARCV2_ACC))) > ] > "TARGET_PLUS_MACD" > - "vmac2h<V_US_suffix>%? %0, %R1, %R2" > + "@ > + vmac2h<V_US_suffix>%?\\t%0,%1,%2 > + vmac2h<V_US_suffix>%?\\t0,%1,%2 > + vmac2h<V_US_suffix>%?\\t%0,%1,%2" > [(set_attr "length" "4") > (set_attr "type" "multi") > - (set_attr "predicable" "yes,no") > - (set_attr "cond" "canuse,nocond")]) > + (set_attr "predicable" "yes,no,no")]) > + > +(define_insn "arc_vec_<V_US>mac_v2hiv2si_zero" > + [(set (reg:V2SI ARCV2_ACC) > + (plus:V2SI > + (mult:V2SI (SE:V2SI (match_operand:V2HI 0 "register_operand" "r")) > + (SE:V2SI (match_operand:V2HI 1 "register_operand" "r"))) > + (reg:V2SI ARCV2_ACC)))] > + "TARGET_PLUS_MACD" > + "vmac2h<V_US_suffix>%?\\t0,%0,%1" > + [(set_attr "length" "4") > + (set_attr "type" "multi")]) > > ;; Builtins > (define_insn "dmach" > -- > 2.26.2 >