LGTM, I'll merge it once stage 1 is open. Cheers, Claudiu
On Tue, Mar 18, 2025 at 6:22 PM Luis Silva <luis.sil...@synopsys.com> wrote: > > This patch introduces two new instruction patterns: > > `*mulsi3_cmp0`: This pattern performs a multiplication > and sets the CC_Z register based on the result, while > also storing the result of the multiplication in a > general-purpose register. > > `*mulsi3_cmp0_noout`: This pattern performs a > multiplication and sets the CC_Z register based on the > result without storing the result in a general-purpose > register. > > These patterns are optimized to generate code using the `mpy.f` > instruction, specifically used where the result is compared to zero. > > In addition, the previous commutative multiplication implementation > was removed. It incorrectly took into account the negative flag, > which is wrong. This new implementation only considers the zero > flag. > > A test case has been added to verify the correctness of these > changes. > > gcc/ChangeLog: > > * config/arc/arc.cc (arc_select_cc_mode): Handle multiplication > results compared against zero, selecting CC_Zmode. > * config/arc/arc.md (*mulsi3_cmp0): New define_insn. > (*mulsi3_cmp0_noout): New define_insn. > > gcc/testsuite/ChangeLog: > > * gcc.target/arc/mult-cmp0.c: New test. > > Signed-off-by: Luis Silva <lu...@synopsys.com> > --- > gcc/config/arc/arc.cc | 7 +++ > gcc/config/arc/arc.md | 34 ++++++++++-- > gcc/testsuite/gcc.target/arc/mult-cmp0.c | 66 ++++++++++++++++++++++++ > 3 files changed, 103 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/arc/mult-cmp0.c > > diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc > index e3d53576768..8ad5649adc0 100644 > --- a/gcc/config/arc/arc.cc > +++ b/gcc/config/arc/arc.cc > @@ -1555,6 +1555,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) > machine_mode mode = GET_MODE (x); > rtx x1; > > + /* Matches all instructions which can do .f and clobbers only Z flag. */ > + if (GET_MODE_CLASS (mode) == MODE_INT > + && y == const0_rtx > + && GET_CODE (x) == MULT > + && (op == EQ || op == NE)) > + return CC_Zmode; > + > /* For an operation that sets the condition codes as a side-effect, the > C and V flags is not set as for cmp, so we can only use comparisons > where > this doesn't matter. (For LT and GE we can use "mi" and "pl" > diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md > index 49dfc9d35af..bc2e8fadd91 100644 > --- a/gcc/config/arc/arc.md > +++ b/gcc/config/arc/arc.md > @@ -253,7 +253,7 @@ > simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc, > simd_valign, simd_valign_with_acc, simd_vcontrol, > simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem, > - fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block" > + fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block, mpy" > (cond [(eq_attr "is_sfunc" "yes") > (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS > || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call") > (match_test "flag_pic") (const_string "sfunc")] > @@ -1068,11 +1068,37 @@ archs4x, archs4xd" > (set_attr "cond" "set_zn") > (set_attr "length" "*,4,4,4,8")]) > > -;; The next two patterns are for plos, ior, xor, and, and mult. > +(define_insn "*mulsi3_cmp0" > + [(set (reg:CC_Z CC_REG) > + (compare:CC_Z > + (mult:SI > + (match_operand:SI 1 "register_operand" "%r,0,r") > + (match_operand:SI 2 "nonmemory_operand" "rL,I,i")) > + (const_int 0))) > + (set (match_operand:SI 0 "register_operand" "=r,r,r") > + (mult:SI (match_dup 1) (match_dup 2)))] > + "TARGET_MPY" > + "mpy%?.f\\t%0,%1,%2" > + [(set_attr "length" "4,4,8") > + (set_attr "type" "mpy")]) > + > +(define_insn "*mulsi3_cmp0_noout" > + [(set (reg:CC_Z CC_REG) > + (compare:CC_Z > + (mult:SI > + (match_operand:SI 0 "register_operand" "%r,r,r") > + (match_operand:SI 1 "nonmemory_operand" "rL,I,i")) > + (const_int 0)))] > + "TARGET_MPY" > + "mpy%?.f\\t0,%0,%1" > + [(set_attr "length" "4,4,8") > + (set_attr "type" "mpy")]) > + > +;; The next two patterns are for plus, ior, xor, and. > (define_insn "*commutative_binary_cmp0_noout" > [(set (match_operand 0 "cc_set_register" "") > (match_operator 4 "zn_compare_operator" > - [(match_operator:SI 3 "commutative_operator" > + [(match_operator:SI 3 "commutative_operator_sans_mult" > [(match_operand:SI 1 "register_operand" "%r,r") > (match_operand:SI 2 "nonmemory_operand" "rL,Cal")]) > (const_int 0)]))] > @@ -1085,7 +1111,7 @@ archs4x, archs4xd" > (define_insn "*commutative_binary_cmp0" > [(set (match_operand 3 "cc_set_register" "") > (match_operator 5 "zn_compare_operator" > - [(match_operator:SI 4 "commutative_operator" > + [(match_operator:SI 4 "commutative_operator_sans_mult" > [(match_operand:SI 1 "register_operand" "%0, 0,r,r") > (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")]) > (const_int 0)])) > diff --git a/gcc/testsuite/gcc.target/arc/mult-cmp0.c > b/gcc/testsuite/gcc.target/arc/mult-cmp0.c > new file mode 100644 > index 00000000000..680c72eaa6d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arc/mult-cmp0.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1" } */ > + > +/* mpy.f r1,r0,r1 > + mov_s r0,5 ;3 > + j_s.d [blink] > + mov.ne r0,r1 */ > +unsigned int > +ubar (unsigned int a, unsigned int b) > +{ > + unsigned int c = a * b; > + if (c == 0) > + { > + return 5; > + } > + return c; > +} > + > +/* mpy.f r1,r0,r1 > + mov_s r0,5 ;3 > + j_s.d [blink] > + mov.ne r0,r1 */ > +signed int > +bar (signed int a, signed int b) > +{ > + signed int c = a * b; > + if (c == 0) > + { > + return 5; > + } > + return c; > +} > + > +/* mpy.f 0,r0,r1 > + mov_s r0,1 ;3 > + j_s.d [blink] > + mov.eq r0,5 */ > +unsigned int > +ufoo (unsigned int a, unsigned int b) > +{ > + if (a * b == 0) > + { > + return 5; > + } > + return 1; > +} > + > +/* mpy.f 0,r0,r1 > + mov_s r0,1 ;3 > + j_s.d [blink] > + mov.eq r0,5 */ > +unsigned int > +foo (signed int a, signed int b) > +{ > + if (a * b == 0) > + { > + return 5; > + } > + return 1; > +} > + > +/* { dg-final { scan-assembler-times "mpy\\.f\\s+0" 2 } } */ > +/* { dg-final { scan-assembler-times "mov\\.ne\\s+" 2 } } */ > +/* { dg-final { scan-assembler-times "mpy\\.f\\s+r" 2 } } */ > +/* { dg-final { scan-assembler-times "mov\\.eq\\s+" 2 } } */ > + > -- > 2.37.1 >