Hi!

The clzl-like integral unary builtins at least from my short testing
on x86_64-linux and aarch64-linux usually benefit from
factor_out_conditional_conversion not being performed, i.e. the
sign-extension (or zero-extension) being performed closer to the actual
builtin, because both library calls and inline code usually computes
the result in the same mode as the single argument has, so the artificial
cast to int for the return value and back can be optimized away if it is
adjacent, but not if it happens in some other bb.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Or shall I also check that gimple_bb (SSA_NAME_DEF_STMT (new_arg0))
is equal to gimple_bb (arg0_def_stmt)?

2017-01-02  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/71016
        * tree-ssa-phiopt.c: Include case-cfn-macros.h.
        (factor_out_conditional_conversion): Don't optimize if new_arg0 is
        result of unary integral builtin, new_arg1 is a constant and
        arg0 has the same precision as builtin's argument.  Formatting fix.

--- gcc/tree-ssa-phiopt.c.jj    2017-01-01 12:45:37.000000000 +0100
+++ gcc/tree-ssa-phiopt.c       2017-01-02 17:01:52.885447496 +0100
@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.
 #include "tree-scalar-evolution.h"
 #include "tree-inline.h"
 #include "params.h"
+#include "case-cfn-macros.h"
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool);
 static bool conditional_replacement (basic_block, basic_block,
@@ -490,6 +491,36 @@ factor_out_conditional_conversion (edge
   if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
     return NULL;
 
+  /* Don't factor out cases where new_arg0 is result of an unary builtin
+     like clzl and arg0's type has the same precision as the builtin's
+     argument.  The reason for that is that the inline expansion of
+     those builtins typically computes the result in the same mode as
+     the argument and then just artificially casts it to int, so such
+     conversions undo that artificial casts and are for free.  See PR71016.  */
+  if (TREE_CODE (new_arg0) == SSA_NAME
+      && is_gimple_call (SSA_NAME_DEF_STMT (new_arg0))
+      && TREE_CODE (new_arg1) == INTEGER_CST)
+    switch (gimple_call_combined_fn (SSA_NAME_DEF_STMT (new_arg0)))
+      {
+       tree barg;
+      CASE_CFN_CLZ:
+      CASE_CFN_CTZ:
+      CASE_CFN_CLRSB:
+      CASE_CFN_FFS:
+      CASE_CFN_PARITY:
+      CASE_CFN_POPCOUNT:
+       barg = gimple_call_arg (SSA_NAME_DEF_STMT (new_arg0), 0);
+       if (INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+           && TREE_CODE (barg) == SSA_NAME
+           && INTEGRAL_TYPE_P (TREE_TYPE (barg))
+           && (TYPE_PRECISION (TREE_TYPE (arg0))
+               == TYPE_PRECISION (TREE_TYPE (barg))))
+         return NULL;
+       break;
+      default:
+       break;
+      }
+
   /* Create a new PHI stmt.  */
   result = PHI_RESULT (phi);
   temp = make_ssa_name (TREE_TYPE (new_arg0), NULL);
@@ -524,7 +555,7 @@ factor_out_conditional_conversion (edge
   /* Create the conversion stmt and insert it.  */
   if (convert_code == VIEW_CONVERT_EXPR)
     temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp);
-  new_stmt = gimple_build_assign (result,  convert_code, temp);
+  new_stmt = gimple_build_assign (result, convert_code, temp);
   gsi = gsi_after_labels (gimple_bb (phi));
   gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 

        Jakub

Reply via email to