On Tue, Apr 1, 2025 at 4:40 PM Hongyu Wang <hongyu.w...@intel.com> wrote:
>
> Hi,
>
> For spiltter after <rotate_insn><mode>3_mask it now splits the pattern
> to *<rotate_insn><mode>3_mask, causing the splitter doesn't generate
> nf variant. Add corresponding nf counterpart for define_insn_and_split
> to make the splitter also works for nf insn.
>
> Bootstrapped & regtested on x86-64-pc-linux-gnu.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/119539
>         * config/i386/i386.md (*<insn><mode>3_mask_nf): New
>         define_insn_and_split.
>         (*<insn><mode>3_mask_1_nf): Likewise.
>         (*<insn><mode>3_mask): Use force_lowpart_subreg.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/119539
>         * gcc.target/i386/apx-nf-pr119539.c: New test.
> ---
>  gcc/config/i386/i386.md                       | 46 ++++++++++++++++++-
>  .../gcc.target/i386/apx-nf-pr119539.c         |  6 +++
>  2 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index f7f790d2aeb..42312f0c330 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -18131,6 +18131,30 @@ (define_expand "<insn><mode>3"
>    DONE;
>  })
>
> +;; Avoid useless masking of count operand.
> +(define_insn_and_split "*<insn><mode>3_mask_nf"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (subreg:QI
> +           (and
> +             (match_operand 2 "int248_register_operand" "c")
> +             (match_operand 3 "const_int_operand")) 0)))]
> +  "TARGET_APX_NF
> +   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (any_rotate:SWI (match_dup 1)
> +                       (match_dup 2)))]
> +{
> +  operands[2] = force_lowpart_subreg (QImode, operands[2],
> +                                     GET_MODE (operands[2]));
> +})
Can we just change the output in original pattern, I think combine
will still match the pattern even w/ clobber flags.

like

@@ -17851,8 +17851,17 @@ (define_insn_and_split "*<insn><mode>3_mask"
                            (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
-  operands[2] = gen_lowpart (QImode, operands[2]);
+  if (TARGET_APX_F)
+    {
+      emit_move_insn (operands[0],
+                    gen_rtx_<code> (<MODE>mode, operands[1], operands[2]));
+      DONE;
+    }
+  else
+    {
+      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+      operands[2] = gen_lowpart (QImode, operands[2]);
+    }
 }

Also we can remove constraint "c" in the original pattern.

> +
>  ;; Avoid useless masking of count operand.
>  (define_insn_and_split "*<insn><mode>3_mask"
>    [(set (match_operand:SWI 0 "nonimmediate_operand")
> @@ -18153,8 +18177,8 @@ (define_insn_and_split "*<insn><mode>3_mask"
>                            (match_dup 2)))
>        (clobber (reg:CC FLAGS_REG))])]
>  {
> -  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
> -  operands[2] = gen_lowpart (QImode, operands[2]);
> +  operands[2] = force_lowpart_subreg (QImode, operands[2],
> +                                     GET_MODE (operands[2]));
>  })
>
>  (define_split
> @@ -18174,6 +18198,24 @@ (define_split
>                          (and:SI (match_dup 2) (match_dup 3)) 0)))]
>   "operands[4] = gen_reg_rtx (<MODE>mode);")
>
> +(define_insn_and_split "*<insn><mode>3_mask_1_nf"
> +  [(set (match_operand:SWI 0 "nonimmediate_operand")
> +       (any_rotate:SWI
> +         (match_operand:SWI 1 "nonimmediate_operand")
> +         (and:QI
> +           (match_operand:QI 2 "register_operand" "c")
> +           (match_operand:QI 3 "const_int_operand"))))]
> +  "TARGET_APX_NF
> +   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (any_rotate:SWI (match_dup 1)
> +                       (match_dup 2)))])
> +
>  (define_insn_and_split "*<insn><mode>3_mask_1"
>    [(set (match_operand:SWI 0 "nonimmediate_operand")
>         (any_rotate:SWI
> diff --git a/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c 
> b/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c
> new file mode 100644
> index 00000000000..5dfec55ed76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mapx-features=nf -march=x86-64 -O2" } */
> +/* { dg-final { scan-assembler-times "\{nf\} rol" 2 } } */
> +
> +long int f1 (int x) { return ~(1ULL << (x & 0x3f)); }
> +long int f2 (char x) { return ~(1ULL << (x & 0x3f)); }
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to