Hi,
I have a maybe silly question about whether there is any *standard*
or *options* (like -ffast-math) for GCC that allow double to float
demotion optimization? For example,
1) from PR22326:
#include <math.h>
float foo(float f, float x, float y) {
return (fabs(f)*x+y);
}
The fabs will return double result but it could be demoted to float
actually since the function returns float finally.
2) From PR90070:
double temp1 = (double)r->red;
double temp2 = (double)aggregate.red;
double temp3 = temp2 + (temp1 * 5.0);
aggregate.red = (float) temp3;
The last data type is also float, so we could also replace the double
precision calculation with single precision.
So, Is it OK to use float instead of double for all or we could ONLY
replace cases when there is explicit double to float conversion in
source code for fast-math build?
However, fast-math option doesn't directly means we could ignore the
precision.
@item -ffast-math
@opindex ffast-math
Sets the options @option{-fno-math-errno}, @option{-funsafe-math-optimizations},
@option{-ffinite-math-only}, @option{-fno-rounding-math},
@option{-fno-signaling-nans}, @option{-fcx-limited-range} and
@option{-fexcess-precision=fast}.
Background is I cooked a patch to track all the double<->float related
convert instructions in backprop pass backwardly, and optimize these
instructions (including assignment in basic block and phi instructions
cross basic block) from double to float with type check, though most
regression test cases passed, there are a few gfortran cases reported
run error of IEEE_INVALID_FLAG, I didn't root caused where the error
happens yet.
There are doubts whether this kind of optimization is *legal* for fast math?
If there are many converts happens in different blocks/regions, how to split
them to avoid interference?
Attached the patch. Sorry that the code has many hacks and not well refined as
it is still at very early version and just functionally works for most cases.
Thanks,
Xionghu
From 8cd4e2ad438466db87bce5535af1847d3d1ef844 Mon Sep 17 00:00:00 2001
From: Xionghu Luo <luo...@linux.ibm.com>
Date: Wed, 25 Nov 2020 20:39:47 -0600
Subject: [PATCH] Implement double promotion remove in backprop
(float) ((double) abs (x) * (double) y + (double) z) could be optimized
to (float)(abs(x) * y + z) if x, y, z are all float types with fast-math
mode.
---
gcc/gimple-ssa-backprop.c | 247 +++++++++++++++++++++++++++++++++++++-
1 file changed, 243 insertions(+), 4 deletions(-)
diff --git a/gcc/gimple-ssa-backprop.c b/gcc/gimple-ssa-backprop.c
index ced0e6ed83c..e1fb09a8fb3 100644
--- a/gcc/gimple-ssa-backprop.c
+++ b/gcc/gimple-ssa-backprop.c
@@ -103,6 +103,8 @@ along with GCC; see the file COPYING3. If not see
#include "alloc-pool.h"
#include "tree-hash-traits.h"
#include "case-cfn-macros.h"
+#include "stor-layout.h"
+#include "flags.h"
namespace {
@@ -113,6 +115,8 @@ public:
usage_info () : flag_word (0) {}
usage_info &operator &= (const usage_info &);
usage_info operator & (const usage_info &) const;
+ usage_info &operator |= (const usage_info &);
+ usage_info operator | (const usage_info &) const;
bool operator == (const usage_info &) const;
bool operator != (const usage_info &) const;
bool is_useful () const;
@@ -125,6 +129,8 @@ public:
{
/* True if the uses treat x and -x in the same way. */
unsigned int ignore_sign : 1;
+ /* True if the uses treat double and float in the same way. */
+ unsigned int ignore_convert : 1;
} flags;
/* All the flag bits as a single int. */
unsigned int flag_word;
@@ -163,6 +169,21 @@ usage_info::operator & (const usage_info &other) const
return info;
}
+usage_info &
+usage_info::operator |= (const usage_info &other)
+{
+ flag_word |= other.flag_word;
+ return *this;
+}
+
+usage_info
+usage_info::operator | (const usage_info &other) const
+{
+ usage_info info (*this);
+ info |= other;
+ return info;
+}
+
bool
usage_info::operator == (const usage_info &other) const
{
@@ -203,6 +224,11 @@ dump_usage_info (FILE *file, tree var, usage_info *info)
dump_usage_prefix (file, var);
fprintf (file, "sign bit not important\n");
}
+ if (info->flags.ignore_convert)
+ {
+ dump_usage_prefix (file, var);
+ fprintf (file, "convert from float to double not important\n");
+ }
}
/* Represents one execution of the pass. */
@@ -257,6 +283,10 @@ private:
along with information that describes all uses. */
auto_vec <var_info_pair, 128> m_vars;
+ int m_converts;
+
+ bool m_start;
+
/* A bitmap of blocks that we have finished processing in the initial
post-order walk. */
auto_sbitmap m_visited_blocks;
@@ -279,7 +309,7 @@ backprop::backprop (function *fn)
: m_fn (fn),
m_info_pool ("usage_info"),
m_visited_blocks (last_basic_block_for_fn (m_fn)),
- m_worklist_names (BITMAP_ALLOC (NULL))
+ m_worklist_names (BITMAP_ALLOC (NULL)), m_converts(0), m_start(false)
{
bitmap_clear (m_visited_blocks);
}
@@ -413,10 +443,26 @@ backprop::process_assign_use (gassign *assign, tree rhs,
usage_info *info)
tree lhs = gimple_assign_lhs (assign);
switch (gimple_assign_rhs_code (assign))
{
+ CASE_CONVERT:
+ {
+ tree rhs = gimple_assign_rhs1 (assign);
+ if (TREE_TYPE (lhs) == float_type_node
+ && TREE_TYPE (rhs) == double_type_node)
+ m_start = true;
+ if (m_start && FLOAT_TYPE_P (TREE_TYPE (rhs)))
+ {
+ info->flags.ignore_convert = true;
+ m_converts++;
+ }
+ }
+ break;
+
case ABS_EXPR:
case ABSU_EXPR:
/* The sign of the input doesn't matter. */
info->flags.ignore_sign = true;
+ if (m_start && FLOAT_TYPE_P (TREE_TYPE (rhs)))
+ info->flags.ignore_convert = true;
break;
case COND_EXPR:
@@ -446,6 +492,18 @@ backprop::process_assign_use (gassign *assign, tree rhs,
usage_info *info)
const usage_info *lhs_info = lookup_operand (lhs);
if (lhs_info)
info->flags.ignore_sign = lhs_info->flags.ignore_sign;
+ if (m_start && lhs_info)
+ info->flags.ignore_convert = lhs_info->flags.ignore_convert;
+ }
+ break;
+
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ if (FLOAT_TYPE_P (TREE_TYPE (rhs)))
+ {
+ const usage_info *lhs_info = lookup_operand (lhs);
+ if (m_start && lhs_info)
+ info->flags.ignore_convert = lhs_info->flags.ignore_convert;
}
break;
@@ -595,8 +653,8 @@ backprop::process_var (tree var)
else if (info != *map_info)
{
/* Recording information that is less optimistic than before. */
- gcc_checking_assert ((info & *map_info) == info);
- *map_info = info;
+ //gcc_checking_assert ((info & *map_info) == info);
+ *map_info |= info;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_var_info (var, map_info, "Updating information");
reprocess_inputs (stmt);
@@ -658,7 +716,9 @@ remove_unused_var (tree var)
}
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
gsi_remove (&gsi, true);
- release_defs (stmt);
+ gphi *phi = dyn_cast<gphi *> (stmt);
+ if (!phi)
+ release_defs (stmt);
}
/* Note that we're replacing OLD_RHS with NEW_RHS in STMT. */
@@ -808,8 +868,32 @@ backprop::replace_assign_rhs (gassign *assign, tree lhs,
tree rhs1,
void
backprop::optimize_assign (gassign *assign, tree lhs, const usage_info *info)
{
+ bool changed = false;
switch (gimple_assign_rhs_code (assign))
{
+ CASE_CONVERT:
+ if (m_converts && info->flags.ignore_convert
+ && fast_math_flags_set_p (&global_options))
+ {
+ tree rhs = gimple_assign_rhs1 (assign);
+ if (TREE_TYPE (lhs) == double_type_node
+ && TREE_TYPE (rhs) == float_type_node)
+ {
+ /* _2 = (double) _1; replace lhs with rhs. */
+ replace_uses_by (lhs, rhs);
+ changed = true;
+ }
+ else if (TREE_TYPE (lhs) == double_type_node
+ && TREE_TYPE (rhs) == float_type_node)
+ {
+ /* _10 = (floatD.26) _6; replace rhs with lhs. */
+ replace_uses_by (rhs, lhs);
+ changed = true;
+ }
+ }
+ break;
+ case PLUS_EXPR:
+ case MINUS_EXPR:
case MULT_EXPR:
case RDIV_EXPR:
/* If the sign of the result doesn't matter, strip sign operations
@@ -819,6 +903,26 @@ backprop::optimize_assign (gassign *assign, tree lhs,
const usage_info *info)
strip_sign_op (gimple_assign_rhs1 (assign)),
strip_sign_op (gimple_assign_rhs2 (assign)),
NULL_TREE);
+ if (m_converts && info->flags.ignore_convert
+ && fast_math_flags_set_p (&global_options))
+ {
+ tree rhs1 = gimple_assign_rhs1 (assign);
+ tree rhs2 = gimple_assign_rhs2 (assign);
+ if (TREE_TYPE (lhs) == double_type_node
+ && TREE_TYPE (rhs1) == float_type_node
+ && TREE_TYPE (rhs2) == float_type_node)
+ {
+ /* _4 = _2 +-* _3; replace lhs with new float type lhs. */
+ tree new_lhs = make_ssa_name (float_type_node);
+ gassign *stmt
+ = gimple_build_assign (new_lhs, gimple_assign_rhs_code (assign),
+ rhs1, rhs2);
+ gimple_stmt_iterator gsi = gsi_for_stmt (assign);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ replace_uses_by (lhs, new_lhs);
+ changed = true;
+ }
+ }
break;
case COND_EXPR:
@@ -834,6 +938,78 @@ backprop::optimize_assign (gassign *assign, tree lhs,
const usage_info *info)
default:
break;
}
+
+ if (!changed && m_converts && info->flags.ignore_convert
+ && fast_math_flags_set_p (&global_options))
+ {
+ enum tree_code code = gimple_assign_rhs_code (assign);
+ tree rhs1 = gimple_assign_rhs1 (assign);
+ tree rhs2 = gimple_assign_rhs2 (assign);
+ tree rhs3 = gimple_assign_rhs3 (assign);
+ location_t loc = gimple_location (assign);
+ tree new_rhs = NULL;
+
+ if (TREE_TYPE (lhs) == double_type_node)
+ {
+ /* _2 = ABS_EXPR <x_7(D)>; */
+ if (!rhs2)
+ {
+ if (gimple_assign_rhs_code (assign) == NEGATE_EXPR
+ && TREE_TYPE (rhs1) == float_type_node
+ && TREE_TYPE (lhs) == double_type_node)
+ {
+ /* rsrhoa_166 = -_265; */
+ tree lhs2 = make_ssa_name (float_type_node);
+ gassign *stmt = gimple_build_assign (lhs2, rhs1);
+ gimple_stmt_iterator gsi = gsi_for_stmt (assign);
+ replace_uses_by (lhs, lhs2);
+ gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+ return;
+ }
+ if (TREE_TYPE (rhs1) != float_type_node)
+ rhs1 = fold_convert_loc (loc, float_type_node, rhs1);
+ tree new_lhs1 = make_ssa_name (float_type_node);
+ gassign *stmt1 = gimple_build_assign (new_lhs1, rhs1);
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (assign);
+ gsi_insert_after (&gsi1, stmt1, GSI_SAME_STMT);
+
+ tree new_lhs = make_ssa_name (float_type_node);
+ gimple *stmt;
+ if (code != FLOAT_EXPR)
+ stmt = gimple_build_assign (new_lhs, code, new_lhs1);
+ else
+ stmt = gimple_build_assign (new_lhs, new_lhs1);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
+ gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+
+ replace_uses_by (lhs, new_lhs);
+ }
+ else
+ {
+ /* _6 = _5 * 2.0e+0; */
+ if (TREE_TYPE (rhs1) != float_type_node)
+ rhs1 = fold_convert_loc (loc, float_type_node, rhs1);
+ tree new_lhs1 = make_ssa_name (float_type_node);
+ gassign *stmt1 = gimple_build_assign (new_lhs1, rhs1);
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (assign);
+ gsi_insert_after (&gsi1, stmt1, GSI_SAME_STMT);
+
+ if (TREE_TYPE (rhs2) != float_type_node)
+ rhs2 = fold_convert_loc (loc, float_type_node, rhs2);
+ tree new_lhs2 = make_ssa_name (float_type_node);
+ gassign *stmt2 = gimple_build_assign (new_lhs2, rhs2);
+ gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt1);
+ gsi_insert_after (&gsi2, stmt2, GSI_SAME_STMT);
+
+ tree new_lhs = make_ssa_name (float_type_node);
+ gimple *stmt = gimple_build_assign (new_lhs, code, new_lhs1,
new_lhs2);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt2);
+ gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+
+ replace_uses_by (lhs, new_lhs);
+ }
+ }
+ }
}
/* Optimize PHI, which defines VAR, on the basis that INFO describes all
@@ -869,6 +1045,69 @@ backprop::optimize_phi (gphi *phi, tree var, const
usage_info *info)
}
}
}
+ if (info->flags.ignore_convert && fast_math_flags_set_p (&global_options))
+ {
+ basic_block bb = gimple_bb (phi);
+ use_operand_p use;
+ ssa_op_iter oi;
+ bool replaced = false;
+ tree lhs = gimple_phi_result (phi);
+
+ bool update_phi = false;
+ if (TREE_TYPE (lhs) == double_type_node)
+ update_phi = true;
+ else if (TREE_TYPE (lhs) == float_type_node)
+ {
+ FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
+ {
+ tree t = USE_FROM_PTR (use);
+
+ if (TREE_TYPE (t) != float_type_node)
+ update_phi = true;
+ }
+ }
+
+ if (update_phi)
+ {
+ tree new_lhs = make_ssa_name (float_type_node);
+
+ gphi *new_phi = create_phi_node (new_lhs, bb);
+ gimple_phi_set_result (new_phi, new_lhs);
+
+ FOR_EACH_PHI_ARG (use, phi, oi, SSA_OP_USE)
+ {
+ /* Propagating along abnormal edges is delicate, punt for now. */
+ const int index = PHI_ARG_INDEX_FROM_USE (use);
+ if (EDGE_PRED (bb, index)->flags & EDGE_ABNORMAL)
+ continue;
+
+ location_t loc = gimple_location (phi);
+ tree t = USE_FROM_PTR (use);
+ if (TREE_CODE (t) == REAL_CST)
+ {
+ t = fold_convert_loc (loc, float_type_node, t);
+ replace_exp (use, t);
+ add_phi_arg (new_phi, t, gimple_phi_arg_edge (phi, index),
+ loc);
+ }
+ else
+ add_phi_arg (new_phi, t, gimple_phi_arg_edge (phi, index), loc);
+ }
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple *using_stmt;
+ FOR_EACH_IMM_USE_STMT (using_stmt, iter, lhs)
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ {
+ SET_USE (use_p, new_lhs);
+ }
+ update_stmt (using_stmt);
+ }
+ replace_uses_by (lhs, new_lhs);
+ }
+ }
}
void
--
2.25.1