Hi, This patch continues resolving andn regression case in stv pass (see https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01017.html). In this patch a new andn pattern added similar to other bit DI patterns we have for stv pass.
This improves performance of 462.libquantum benchmark on Haswell (+2.6% on -O2, +1% on -O3 -flto). Unfortunately this patch doesn't enable generation of pandn in case target doesn't have BMI. Probably peephole may be used for such targets? Or we may allow andn and then split it back to and + xor for them. Bootstrapped and regtested on x86_64-unknown-linux-gnu. OK for trunk? Thanks, Ilya -- gcc/ 2016-01-15 Ilya Enkovich <enkovich....@gmail.com> * config/i386/i386.c (scalar_to_vector_candidate_p): Support andnot instruction. (scalar_chain::convert_op): Likewise. * config/i386/i386.md (*andndi3_doubleword): New. gcc/testsuite/ 2016-01-15 Ilya Enkovich <enkovich....@gmail.com> * gcc.target/i386/pr65105-5.c: Adjust to andn generation. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index de41477..a0b0d68 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2815,7 +2815,11 @@ scalar_to_vector_candidate_p (rtx_insn *insn) return false; } - if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))) + if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)) + /* Check for andnot case. */ + && (GET_CODE (src) != AND + || GET_CODE (XEXP (src, 0)) != NOT + || !REG_P (XEXP (XEXP (src, 0), 0)))) return false; if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) @@ -3383,7 +3387,12 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) { *op = copy_rtx_if_shared (*op); - if (MEM_P (*op)) + if (GET_CODE (*op) == NOT) + { + convert_op (&XEXP (*op, 0), insn); + PUT_MODE (*op, V2DImode); + } + else if (MEM_P (*op)) { rtx tmp = gen_reg_rtx (DImode); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 71941d0..f16b42a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8645,6 +8645,23 @@ (clobber (reg:CC FLAGS_REG))])] "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") +(define_insn_and_split "*andndi3_doubleword" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (and:DI + (not:DI (match_operand:DI 1 "register_operand" "r,r")) + (match_operand:DI 2 "nonimmediate_operand" "r,m"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (and:SI (not:SI (match_dup 1)) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 3) + (and:SI (not:SI (match_dup 4)) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") + (define_insn "*<code>hi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k") (any_or:HI diff --git a/gcc/testsuite/gcc.target/i386/pr65105-5.c b/gcc/testsuite/gcc.target/i386/pr65105-5.c index 5818c1c..639bbe1 100644 --- a/gcc/testsuite/gcc.target/i386/pr65105-5.c +++ b/gcc/testsuite/gcc.target/i386/pr65105-5.c @@ -1,7 +1,7 @@ /* PR target/pr65105 */ /* { dg-do compile { target { ia32 } } } */ /* { dg-options "-O2 -march=core-avx2" } */ -/* { dg-final { scan-assembler "pand" } } */ +/* { dg-final { scan-assembler "pandn" } } */ /* { dg-final { scan-assembler "pxor" } } */ /* { dg-final { scan-assembler "ptest" } } */