On Tue, Apr 1, 2025 at 4:40 PM Hongyu Wang <hongyu.w...@intel.com> wrote: > > Hi, > > For spiltter after <rotate_insn><mode>3_mask it now splits the pattern > to *<rotate_insn><mode>3_mask, causing the splitter doesn't generate > nf variant. Add corresponding nf counterpart for define_insn_and_split > to make the splitter also works for nf insn. > > Bootstrapped & regtested on x86-64-pc-linux-gnu. > > Ok for trunk? > > gcc/ChangeLog: > > PR target/119539 > * config/i386/i386.md (*<insn><mode>3_mask_nf): New > define_insn_and_split. > (*<insn><mode>3_mask_1_nf): Likewise. > (*<insn><mode>3_mask): Use force_lowpart_subreg. > > gcc/testsuite/ChangeLog: > > PR target/119539 > * gcc.target/i386/apx-nf-pr119539.c: New test. > --- > gcc/config/i386/i386.md | 46 ++++++++++++++++++- > .../gcc.target/i386/apx-nf-pr119539.c | 6 +++ > 2 files changed, 50 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index f7f790d2aeb..42312f0c330 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -18131,6 +18131,30 @@ (define_expand "<insn><mode>3" > DONE; > }) > > +;; Avoid useless masking of count operand. > +(define_insn_and_split "*<insn><mode>3_mask_nf" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (subreg:QI > + (and > + (match_operand 2 "int248_register_operand" "c") > + (match_operand 3 "const_int_operand")) 0)))] > + "TARGET_APX_NF > + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) > + == GET_MODE_BITSIZE (<MODE>mode)-1 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (any_rotate:SWI (match_dup 1) > + (match_dup 2)))] > +{ > + operands[2] = force_lowpart_subreg (QImode, operands[2], > + GET_MODE (operands[2])); > +}) Can we just change the output in original pattern, I think combine will still match the pattern even w/ clobber flags.
like @@ -17851,8 +17851,17 @@ (define_insn_and_split "*<insn><mode>3_mask" (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { - operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); - operands[2] = gen_lowpart (QImode, operands[2]); + if (TARGET_APX_F) + { + emit_move_insn (operands[0], + gen_rtx_<code> (<MODE>mode, operands[1], operands[2])); + DONE; + } + else + { + operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); + operands[2] = gen_lowpart (QImode, operands[2]); + } } Also we can remove constraint "c" in the original pattern. > + > ;; Avoid useless masking of count operand. > (define_insn_and_split "*<insn><mode>3_mask" > [(set (match_operand:SWI 0 "nonimmediate_operand") > @@ -18153,8 +18177,8 @@ (define_insn_and_split "*<insn><mode>3_mask" > (match_dup 2))) > (clobber (reg:CC FLAGS_REG))])] > { > - operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); > - operands[2] = gen_lowpart (QImode, operands[2]); > + operands[2] = force_lowpart_subreg (QImode, operands[2], > + GET_MODE (operands[2])); > }) > > (define_split > @@ -18174,6 +18198,24 @@ (define_split > (and:SI (match_dup 2) (match_dup 3)) 0)))] > "operands[4] = gen_reg_rtx (<MODE>mode);") > > +(define_insn_and_split "*<insn><mode>3_mask_1_nf" > + [(set (match_operand:SWI 0 "nonimmediate_operand") > + (any_rotate:SWI > + (match_operand:SWI 1 "nonimmediate_operand") > + (and:QI > + (match_operand:QI 2 "register_operand" "c") > + (match_operand:QI 3 "const_int_operand"))))] > + "TARGET_APX_NF > + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) > + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) > + == GET_MODE_BITSIZE (<MODE>mode)-1 > + && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (any_rotate:SWI (match_dup 1) > + (match_dup 2)))]) > + > (define_insn_and_split "*<insn><mode>3_mask_1" > [(set (match_operand:SWI 0 "nonimmediate_operand") > (any_rotate:SWI > diff --git a/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c > b/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c > new file mode 100644 > index 00000000000..5dfec55ed76 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-nf-pr119539.c > @@ -0,0 +1,6 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mapx-features=nf -march=x86-64 -O2" } */ > +/* { dg-final { scan-assembler-times "\{nf\} rol" 2 } } */ > + > +long int f1 (int x) { return ~(1ULL << (x & 0x3f)); } > +long int f2 (char x) { return ~(1ULL << (x & 0x3f)); } > -- > 2.31.1 > -- BR, Hongtao