On 03/05/18 10:20, Richard Biener wrote: > On Thu, May 3, 2018 at 10:39 AM, Kyrill Tkachov > <kyrylo.tkac...@foss.arm.com> wrote: >> >> On 02/05/18 17:45, Wilco Dijkstra wrote: >>> >>> Richard Biener wrote: >>> >>>> why use BUILT_IN_ISUNORDERED but not a GIMPLE_COND with >>>> UNORDERED_EXPR? Note again that might trap/throw with -fsignalling-nans >>>> so better avoid this transform for flag_signalling_nans as well... >>> >>> Both currently trap on signalling NaNs due to the implementation of the >>> C99 >>> builtins being buggy (PR66462). >>> >>> However since the inputs are results of FP operations, they are guaranteed >>> to be quiet NaNs in this case, so no extra test for signalling NaNs is >>> required. >> >> >> I'm happy to remove the check on flag_signaling_nans from the patch. > > The issue is not actual runtime behavior but GIMPLE IL sanity. We force > possibly trapping compares out of conditions: > > double x; > int main() > { > try { > if (__builtin_isunordered (x, x)) > __builtin_abort (); > } catch (...) { > } > } > > with -fsignaling-nans -fnon-call-exceptions you get > > _3 = x.1_1 unord x.2_2; > if (_3 != 0) goto <D.2364>; else goto <D.2365>; > > while without either it is > > if (x.1_1 unord x.2_2) goto <D.2364>; else goto <D.2365>; > > I'm not sure it actually matters here - at least on GIMPLE there's no > later sanity-checking on this. So maybe you can remove the condition > (but please quickly try a testcase with -fnon-call-exceptions and > -fsignaling-nans > and an EH tree - I suppose you hit the stmt_can_throw_internal case once you > have some EH handlers here). > > + rr = create_tmp_var (inner_type); > + rr = make_ssa_name (rr); > + ri = create_tmp_var (inner_type); > + ri = make_ssa_name (ri); > > just use > > rr = make_ssa_name (inner_type); > ri = make_ssa_name (inner_type); > > Otherwise OK. >
As discussed in PR 85627 and on IRC I've committed this with the above change as r259889. Thanks for the guidance! Kyrill
diff --git a/gcc/testsuite/gcc.dg/complex-6.c b/gcc/testsuite/gcc.dg/complex-6.c new file mode 100644 index 0000000000000000000000000000000000000000..e70322bf6f378d1d6947de4c10f671bc0a7ded49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/complex-6.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/70291. */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cplxlower" } */ + +__complex float +foo (__complex float a, __complex float b) +{ + return a * b; +} + +/* { dg-final { scan-tree-dump-times "unord" 1 "cplxlower1" } } */ +/* { dg-final { scan-tree-dump-times "__mulsc3" 1 "cplxlower1" } } */ diff --git a/gcc/testsuite/gcc.dg/complex-7.c b/gcc/testsuite/gcc.dg/complex-7.c new file mode 100644 index 0000000000000000000000000000000000000000..78f1a290e34af0c092a00639d979bc32b332b1da --- /dev/null +++ b/gcc/testsuite/gcc.dg/complex-7.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/70291. */ + +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-cplxlower" } */ + +__complex double +foo (__complex double a, __complex double b) +{ + return a * b; +} + +/* { dg-final { scan-tree-dump-times "unord" 1 "cplxlower1" } } */ +/* { dg-final { scan-tree-dump-times "__muldc3" 1 "cplxlower1" } } */ diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c index 622b8696399b9e9d8bddcc6340d2f8d8ca852637..87e27aacb517c52924edafb8fd0916a08b1589fd 100644 --- a/gcc/tree-complex.c +++ b/gcc/tree-complex.c @@ -978,22 +978,22 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree inner_type, } /* Expand a complex multiplication or division to a libcall to the c99 - compliant routines. */ + compliant routines. TYPE is the complex type of the operation. + If INPLACE_P replace the statement at GSI with + the libcall and return NULL_TREE. Else insert the call, assign its + result to an output variable and return that variable. If INPLACE_P + is true then the statement being replaced should be an assignment + statement. */ -static void -expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, - tree br, tree bi, enum tree_code code) +static tree +expand_complex_libcall (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai, + tree br, tree bi, enum tree_code code, bool inplace_p) { machine_mode mode; enum built_in_function bcode; - tree fn, type, lhs; - gimple *old_stmt; + tree fn, lhs; gcall *stmt; - old_stmt = gsi_stmt (*gsi); - lhs = gimple_assign_lhs (old_stmt); - type = TREE_TYPE (lhs); - mode = TYPE_MODE (type); gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT); @@ -1008,21 +1008,65 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, fn = builtin_decl_explicit (bcode); stmt = gimple_build_call (fn, 4, ar, ai, br, bi); - gimple_call_set_lhs (stmt, lhs); - update_stmt (stmt); - gsi_replace (gsi, stmt, false); - if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) - gimple_purge_dead_eh_edges (gsi_bb (*gsi)); - if (gimple_in_ssa_p (cfun)) + if (inplace_p) { + gimple *old_stmt = gsi_stmt (*gsi); + lhs = gimple_assign_lhs (old_stmt); + gimple_call_set_lhs (stmt, lhs); + update_stmt (stmt); + gsi_replace (gsi, stmt, false); + + if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt)) + gimple_purge_dead_eh_edges (gsi_bb (*gsi)); + type = TREE_TYPE (type); update_complex_components (gsi, stmt, - build1 (REALPART_EXPR, type, lhs), - build1 (IMAGPART_EXPR, type, lhs)); + build1 (REALPART_EXPR, type, lhs), + build1 (IMAGPART_EXPR, type, lhs)); SSA_NAME_DEF_STMT (lhs) = stmt; + return NULL_TREE; } + + lhs = create_tmp_var (type); + gimple_call_set_lhs (stmt, lhs); + + lhs = make_ssa_name (lhs, stmt); + gimple_call_set_lhs (stmt, lhs); + + update_stmt (stmt); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + return lhs; +} + +/* Perform a complex multiplication on two complex constants A, B represented + by AR, AI, BR, BI of type TYPE. + The operation we want is: a * b = (ar*br - ai*bi) + i(ar*bi + br*ai). + Insert the GIMPLE statements into GSI. Store the real and imaginary + components of the result into RR and RI. */ + +static void +expand_complex_multiplication_components (gimple_stmt_iterator *gsi, + tree type, tree ar, tree ai, + tree br, tree bi, + tree *rr, tree *ri) +{ + tree t1, t2, t3, t4; + + t1 = gimplify_build2 (gsi, MULT_EXPR, type, ar, br); + t2 = gimplify_build2 (gsi, MULT_EXPR, type, ai, bi); + t3 = gimplify_build2 (gsi, MULT_EXPR, type, ar, bi); + + /* Avoid expanding redundant multiplication for the common + case of squaring a complex number. */ + if (ar == br && ai == bi) + t4 = t3; + else + t4 = gimplify_build2 (gsi, MULT_EXPR, type, ai, br); + + *rr = gimplify_build2 (gsi, MINUS_EXPR, type, t1, t2); + *ri = gimplify_build2 (gsi, PLUS_EXPR, type, t3, t4); } /* Expand complex multiplication to scalars: @@ -1030,11 +1074,12 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, */ static void -expand_complex_multiplication (gimple_stmt_iterator *gsi, tree inner_type, +expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai, tree br, tree bi, complex_lattice_t al, complex_lattice_t bl) { tree rr, ri; + tree inner_type = TREE_TYPE (type); if (al < bl) { @@ -1080,27 +1125,77 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree inner_type, case PAIR (VARYING, VARYING): if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type)) { - expand_complex_libcall (gsi, ar, ai, br, bi, MULT_EXPR); - return; - } - else - { - tree t1, t2, t3, t4; - - t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, br); - t2 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, bi); - t3 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, bi); - - /* Avoid expanding redundant multiplication for the common - case of squaring a complex number. */ - if (ar == br && ai == bi) - t4 = t3; - else - t4 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, br); + /* If optimizing for size or not at all just do a libcall. + Same if there are exception-handling edges or signaling NaNs. */ + if (optimize == 0 || optimize_bb_for_size_p (gsi_bb (*gsi)) + || stmt_can_throw_internal (gsi_stmt (*gsi)) + || flag_signaling_nans) + { + expand_complex_libcall (gsi, type, ar, ai, br, bi, + MULT_EXPR, true); + return; + } - rr = gimplify_build2 (gsi, MINUS_EXPR, inner_type, t1, t2); - ri = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t3, t4); + /* Else, expand x = a * b into + x = (ar*br - ai*bi) + i(ar*bi + br*ai); + if (isunordered (__real__ x, __imag__ x)) + x = __muldc3 (a, b); */ + + tree tmpr, tmpi; + expand_complex_multiplication_components (gsi, inner_type, ar, ai, + br, bi, &tmpr, &tmpi); + + gimple *check + = gimple_build_cond (UNORDERED_EXPR, tmpr, tmpi, + NULL_TREE, NULL_TREE); + + basic_block orig_bb = gsi_bb (*gsi); + /* We want to keep track of the original complex multiplication + statement as we're going to modify it later in + update_complex_assignment. Make sure that insert_cond_bb leaves + that statement in the join block. */ + gsi_prev (gsi); + basic_block cond_bb + = insert_cond_bb (gsi_bb (*gsi), gsi_stmt (*gsi), check, + profile_probability::very_unlikely ()); + + + gimple_stmt_iterator cond_bb_gsi = gsi_last_bb (cond_bb); + gsi_insert_after (&cond_bb_gsi, gimple_build_nop (), GSI_NEW_STMT); + + tree libcall_res + = expand_complex_libcall (&cond_bb_gsi, type, ar, ai, br, + bi, MULT_EXPR, false); + tree cond_real = gimplify_build1 (&cond_bb_gsi, REALPART_EXPR, + inner_type, libcall_res); + tree cond_imag = gimplify_build1 (&cond_bb_gsi, IMAGPART_EXPR, + inner_type, libcall_res); + + basic_block join_bb = single_succ_edge (cond_bb)->dest; + *gsi = gsi_start_nondebug_after_labels_bb (join_bb); + + /* We have a conditional block with some assignments in cond_bb. + Wire up the PHIs to wrap up. */ + rr = make_ssa_name (inner_type); + ri = make_ssa_name (inner_type); + edge cond_to_join = single_succ_edge (cond_bb); + edge orig_to_join = find_edge (orig_bb, join_bb); + + gphi *real_phi = create_phi_node (rr, gsi_bb (*gsi)); + add_phi_arg (real_phi, cond_real, cond_to_join, + UNKNOWN_LOCATION); + add_phi_arg (real_phi, tmpr, orig_to_join, UNKNOWN_LOCATION); + + gphi *imag_phi = create_phi_node (ri, gsi_bb (*gsi)); + add_phi_arg (imag_phi, cond_imag, cond_to_join, + UNKNOWN_LOCATION); + add_phi_arg (imag_phi, tmpi, orig_to_join, UNKNOWN_LOCATION); } + else + /* If we are not worrying about NaNs expand to + (ar*br - ai*bi) + i(ar*bi + br*ai) directly. */ + expand_complex_multiplication_components (gsi, inner_type, ar, ai, + br, bi, &rr, &ri); break; default: @@ -1308,13 +1403,14 @@ expand_complex_div_wide (gimple_stmt_iterator *gsi, tree inner_type, /* Expand complex division to scalars. */ static void -expand_complex_division (gimple_stmt_iterator *gsi, tree inner_type, +expand_complex_division (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai, tree br, tree bi, enum tree_code code, complex_lattice_t al, complex_lattice_t bl) { tree rr, ri; + tree inner_type = TREE_TYPE (type); switch (PAIR (al, bl)) { case PAIR (ONLY_REAL, ONLY_REAL): @@ -1362,7 +1458,7 @@ expand_complex_division (gimple_stmt_iterator *gsi, tree inner_type, case 2: if (SCALAR_FLOAT_TYPE_P (inner_type)) { - expand_complex_libcall (gsi, ar, ai, br, bi, code); + expand_complex_libcall (gsi, type, ar, ai, br, bi, code, true); break; } /* FALLTHRU */ @@ -1630,7 +1726,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) break; case MULT_EXPR: - expand_complex_multiplication (gsi, inner_type, ar, ai, br, bi, al, bl); + expand_complex_multiplication (gsi, type, ar, ai, br, bi, al, bl); break; case TRUNC_DIV_EXPR: @@ -1638,7 +1734,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: case RDIV_EXPR: - expand_complex_division (gsi, inner_type, ar, ai, br, bi, code, al, bl); + expand_complex_division (gsi, type, ar, ai, br, bi, code, al, bl); break; case NEGATE_EXPR: