Hello!

Attached patch fixes STV cost function to better model gains of pandn
insn on non-BMI targets. As explained in the PR, STV converts four
scalar arithmetic insns (2 * not and 2 * and) to one (pandn). The
patch increases gain for non-BMI targets for 2 * ix86_cost->add to a
total of 3 * ix86_cost->add.

2016-12-13  Uros Bizjak  <ubiz...@gmail.com>

    PR target/78794
    * config/i386/i386.c (dimode_scalar_chain::compute_convert_gain):
    Calculate additional gain for andnot for targets without BMI.

testsuite/ChangeLog:

2016-12-13  Uros Bizjak  <ubiz...@gmail.com>

    PR target/78794
    * gcc.target/i386/pr78794.c: New test.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 243611)
+++ config/i386/i386.c  (working copy)
@@ -3419,6 +3419,10 @@ dimode_scalar_chain::compute_convert_gain ()
               || GET_CODE (src) == AND)
        {
          gain += ix86_cost->add;
+         /* Additional gain for andnot for targets without BMI.  */
+         if (GET_CODE (XEXP (src, 0)) == NOT
+             && !TARGET_BMI)
+           gain += 2 * ix86_cost->add;
          if (CONST_INT_P (XEXP (src, 0)))
            gain -= vector_const_cost (XEXP (src, 0));
          if (CONST_INT_P (XEXP (src, 1)))
Index: testsuite/gcc.target/i386/pr78794.c
===================================================================
--- testsuite/gcc.target/i386/pr78794.c (nonexistent)
+++ testsuite/gcc.target/i386/pr78794.c (working copy)
@@ -0,0 +1,32 @@
+/* PR target/pr78794 */
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -march=slm -mno-bmi -mno-stackrealign" } */
+/* { dg-final { scan-assembler "pandn" } } */
+
+typedef unsigned long long ull;
+
+struct S1
+{
+  float x;
+  ull y;
+};
+
+
+struct S2
+{
+  int a1;
+  struct S1 *node;
+  int *a2;
+};
+
+void
+foo(int c1, int c2, int c3, struct S2 *reg)
+{
+  int i;
+  for(i=0; i<reg->a1; i++)
+    if(reg->node[i].y & ((ull) 1 << c1))
+      {
+       if(reg->node[i].y & ((ull) 1 << c2))
+         reg->node[i].y ^= ((ull) 1 << c3);
+      }
+}

Reply via email to