LGTM, I'll merge it once stage 1 is open.

Cheers,
Claudiu

On Tue, Mar 18, 2025 at 6:22 PM Luis Silva <luis.sil...@synopsys.com> wrote:
>
> This patch introduces two new instruction patterns:
>
>     `*mulsi3_cmp0`:  This pattern performs a multiplication
>     and sets the CC_Z register based on the result, while
>     also storing the result of the multiplication in a
>     general-purpose register.
>
>     `*mulsi3_cmp0_noout`:  This pattern performs a
>     multiplication and sets the CC_Z register based on the
>     result without storing the result in a general-purpose
>     register.
>
> These patterns are optimized to generate code using the `mpy.f`
> instruction, specifically used where the result is compared to zero.
>
> In addition, the previous commutative multiplication implementation
> was removed.  It incorrectly took into account the negative flag,
> which is wrong.  This new implementation only considers the zero
> flag.
>
> A test case has been added to verify the correctness of these
> changes.
>
> gcc/ChangeLog:
>
>         * config/arc/arc.cc (arc_select_cc_mode): Handle multiplication
>         results compared against zero, selecting CC_Zmode.
>         * config/arc/arc.md (*mulsi3_cmp0): New define_insn.
>         (*mulsi3_cmp0_noout): New define_insn.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/arc/mult-cmp0.c: New test.
>
> Signed-off-by: Luis Silva <lu...@synopsys.com>
> ---
>  gcc/config/arc/arc.cc                    |  7 +++
>  gcc/config/arc/arc.md                    | 34 ++++++++++--
>  gcc/testsuite/gcc.target/arc/mult-cmp0.c | 66 ++++++++++++++++++++++++
>  3 files changed, 103 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arc/mult-cmp0.c
>
> diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
> index e3d53576768..8ad5649adc0 100644
> --- a/gcc/config/arc/arc.cc
> +++ b/gcc/config/arc/arc.cc
> @@ -1555,6 +1555,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
>    machine_mode mode = GET_MODE (x);
>    rtx x1;
>
> +  /* Matches all instructions which can do .f and clobbers only Z flag.  */
> +  if (GET_MODE_CLASS (mode) == MODE_INT
> +      && y == const0_rtx
> +      && GET_CODE (x) == MULT
> +      && (op == EQ || op == NE))
> +    return CC_Zmode;
> +
>    /* For an operation that sets the condition codes as a side-effect, the
>       C and V flags is not set as for cmp, so we can only use comparisons 
> where
>       this doesn't matter.  (For LT and GE we can use "mi" and "pl"
> diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
> index 49dfc9d35af..bc2e8fadd91 100644
> --- a/gcc/config/arc/arc.md
> +++ b/gcc/config/arc/arc.md
> @@ -253,7 +253,7 @@
>     simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
>     simd_valign, simd_valign_with_acc, simd_vcontrol,
>     simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem,
> -   fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block"
> +   fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block, mpy"
>    (cond [(eq_attr "is_sfunc" "yes")
>          (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS 
> || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
>                 (match_test "flag_pic") (const_string "sfunc")]
> @@ -1068,11 +1068,37 @@ archs4x, archs4xd"
>     (set_attr "cond" "set_zn")
>     (set_attr "length" "*,4,4,4,8")])
>
> -;; The next two patterns are for plos, ior, xor, and, and mult.
> +(define_insn "*mulsi3_cmp0"
> +  [(set (reg:CC_Z CC_REG)
> +       (compare:CC_Z
> +        (mult:SI
> +         (match_operand:SI 1 "register_operand"  "%r,0,r")
> +         (match_operand:SI 2 "nonmemory_operand" "rL,I,i"))
> +        (const_int 0)))
> +   (set (match_operand:SI 0 "register_operand"    "=r,r,r")
> +       (mult:SI (match_dup 1) (match_dup 2)))]
> + "TARGET_MPY"
> + "mpy%?.f\\t%0,%1,%2"
> + [(set_attr "length" "4,4,8")
> +  (set_attr "type" "mpy")])
> +
> +(define_insn "*mulsi3_cmp0_noout"
> +  [(set (reg:CC_Z CC_REG)
> +       (compare:CC_Z
> +        (mult:SI
> +         (match_operand:SI 0 "register_operand"   "%r,r,r")
> +         (match_operand:SI 1 "nonmemory_operand"  "rL,I,i"))
> +        (const_int 0)))]
> + "TARGET_MPY"
> + "mpy%?.f\\t0,%0,%1"
> + [(set_attr "length" "4,4,8")
> +  (set_attr "type" "mpy")])
> +
> +;; The next two patterns are for plus, ior, xor, and.
>  (define_insn "*commutative_binary_cmp0_noout"
>    [(set (match_operand 0 "cc_set_register" "")
>         (match_operator 4 "zn_compare_operator"
> -         [(match_operator:SI 3 "commutative_operator"
> +         [(match_operator:SI 3 "commutative_operator_sans_mult"
>              [(match_operand:SI 1 "register_operand" "%r,r")
>               (match_operand:SI 2 "nonmemory_operand" "rL,Cal")])
>            (const_int 0)]))]
> @@ -1085,7 +1111,7 @@ archs4x, archs4xd"
>  (define_insn "*commutative_binary_cmp0"
>    [(set (match_operand 3 "cc_set_register" "")
>         (match_operator 5 "zn_compare_operator"
> -         [(match_operator:SI 4 "commutative_operator"
> +         [(match_operator:SI 4 "commutative_operator_sans_mult"
>              [(match_operand:SI 1 "register_operand"  "%0, 0,r,r")
>               (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")])
>            (const_int 0)]))
> diff --git a/gcc/testsuite/gcc.target/arc/mult-cmp0.c 
> b/gcc/testsuite/gcc.target/arc/mult-cmp0.c
> new file mode 100644
> index 00000000000..680c72eaa6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arc/mult-cmp0.c
> @@ -0,0 +1,66 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1" } */
> +
> +/* mpy.f   r1,r0,r1
> +   mov_s   r0,5    ;3
> +   j_s.d   [blink]
> +   mov.ne  r0,r1 */
> +unsigned int
> +ubar (unsigned int a, unsigned int b)
> +{
> +       unsigned int c = a * b;
> +       if (c == 0)
> +       {
> +               return 5;
> +       }
> +       return c;
> +}
> +
> +/*  mpy.f   r1,r0,r1
> +    mov_s   r0,5    ;3
> +    j_s.d   [blink]
> +    mov.ne  r0,r1 */
> +signed int
> +bar (signed int a, signed int b)
> +{
> +       signed int c = a * b;
> +       if (c == 0)
> +       {
> +               return 5;
> +       }
> +       return c;
> +}
> +
> +/* mpy.f   0,r0,r1
> +   mov_s   r0,1    ;3
> +   j_s.d   [blink]
> +   mov.eq  r0,5 */
> +unsigned int
> +ufoo (unsigned int a, unsigned int b)
> +{
> +       if (a * b == 0)
> +       {
> +               return 5;
> +       }
> +       return 1;
> +}
> +
> +/*  mpy.f   0,r0,r1
> +    mov_s   r0,1    ;3
> +    j_s.d   [blink]
> +    mov.eq  r0,5 */
> +unsigned int
> +foo (signed int a, signed int b)
> +{
> +       if (a * b == 0)
> +       {
> +               return 5;
> +       }
> +       return 1;
> +}
> +
> +/* { dg-final { scan-assembler-times "mpy\\.f\\s+0" 2 } } */
> +/* { dg-final { scan-assembler-times "mov\\.ne\\s+" 2 } } */
> +/* { dg-final { scan-assembler-times "mpy\\.f\\s+r" 2 } } */
> +/* { dg-final { scan-assembler-times "mov\\.eq\\s+" 2 } } */
> +
> --
> 2.37.1
>

Reply via email to