Hi! As the testcases shows, the -Wconversion warning behaves quite differently when -fsanitize=undefined vs. when not sanitizing, but in the end it is not something specific to sanitizing, if a user uses return static_cast<uc>(static_cast<uc>((d++, a) << 1U) | b) | c; instead of return static_cast<uc>(static_cast<uc>(a << 1U) | b) | c; and thus there is some COMPOUND_EXPR involved, cp_build_binary_op behaves significantly different, e.g. shorten_binary_op will have different result (uc for the case without COMPOUND_EXPR, int with it), but it isn't limited to that.
The following patch attempts to handle those the same, basically ignoring everything but the ultimately last operand of COMPOUND_EXPR(s) and treating the other COMPOUND_EXPR(s) operand(s) just as side-effects that need to be evaluated first. Of course, if CODE has evaluation ordering (&&, ||, and <<, >> in C++17), we need to be more careful, but as in all of those op0 is sequenced before op1, we can just handle op0 that way and not do it for op1. The patch can still change evaluation order, but I believe only when it is undefined, e.g. (foo (), bar ()) + (baz (), qux ()) used to be evaluated as foo, bar, baz, qux and now would be foo, baz, bar, qux, so if you think we should defer it for GCC11, fine with me. As for compile time/memory complexity, as the patch pushes the side-effects into a single expression that is then added, after it has been performed the outermost COMPOUND_EXPR should contain the final result in op1 and all the other COMPOUND_EXPR if any in op0 and so another cp_build_binary_op should see just one COMPOUND_EXPR to handle. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk, or defer for GCC11? Or do we instead want to remember the pre-side-effects for each operand separately, perform what cp_build_binary_op does and readd them right before constructing the final result to each? 2020-03-05 Jakub Jelinek <ja...@redhat.com> PR c++/91993 * typeck.c (cp_build_binary_op): If orig_op0 is COMPOUND_EXPR, push the first operand into instrument_expr and use the second operand as orig_op0. Similarly for orig_op1 if evaluation order is undefined. Make sure to add instrument_expr into a new COMPOUND_EXPR before returning. Use add_stmt_to_compound even if instrument_expr is NULL. Formatting fix. * g++.dg/warn/Wconversion-pr91993.C: New test. * g++.dg/ubsan/pr91993.C: New test. --- gcc/cp/typeck.c.jj 2020-03-05 07:57:41.759445443 +0100 +++ gcc/cp/typeck.c 2020-03-05 08:25:23.841850883 +0100 @@ -4478,6 +4478,25 @@ cp_build_binary_op (const op_location_t /* Tree holding instrumentation expression. */ tree instrument_expr = NULL_TREE; + while (TREE_CODE (orig_op0) == COMPOUND_EXPR) + { + instrument_expr = add_stmt_to_compound (instrument_expr, + TREE_OPERAND (orig_op0, 0)); + orig_op0 = TREE_OPERAND (orig_op0, 1); + } + if (code != TRUTH_ANDIF_EXPR + && code != TRUTH_ORIF_EXPR + && ((code != LSHIFT_EXPR && code != RSHIFT_EXPR) + || flag_strong_eval_order != 2)) + { + while (TREE_CODE (orig_op1) == COMPOUND_EXPR) + { + instrument_expr = add_stmt_to_compound (instrument_expr, + TREE_OPERAND (orig_op1, 0)); + orig_op1 = TREE_OPERAND (orig_op1, 1); + } + } + /* Apply default conversions. */ op0 = resolve_nondeduced_context (orig_op0, complain); op1 = resolve_nondeduced_context (orig_op1, complain); @@ -4566,8 +4585,8 @@ cp_build_binary_op (const op_location_t && !TYPE_PTR_OR_PTRMEM_P (type1))) && (complain & tf_warning)) { - location_t loc = - expansion_point_location_if_in_system_header (input_location); + location_t loc + = expansion_point_location_if_in_system_header (input_location); warning_at (loc, OPT_Wpointer_arith, "NULL used in arithmetic"); } @@ -4618,10 +4637,13 @@ cp_build_binary_op (const op_location_t && same_type_ignoring_top_level_qualifiers_p (TREE_TYPE (type0), TREE_TYPE (type1))) { + tree instrument_expr1 = NULL_TREE; result = pointer_diff (location, op0, op1, common_pointer_type (type0, type1), complain, - &instrument_expr); - if (instrument_expr != NULL) + &instrument_expr1); + instrument_expr = add_stmt_to_compound (instrument_expr, + instrument_expr1); + if (instrument_expr != NULL_TREE) result = build2 (COMPOUND_EXPR, TREE_TYPE (result), instrument_expr, result); @@ -4650,10 +4672,12 @@ cp_build_binary_op (const op_location_t result_type = TREE_TYPE (ptr_operand); break; } - return cp_pointer_int_sum (location, code, - ptr_operand, - int_operand, - complain); + result = cp_pointer_int_sum (location, code, ptr_operand, + int_operand, complain); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } common = 1; break; @@ -4776,15 +4800,22 @@ cp_build_binary_op (const op_location_t if (code == TRUTH_ANDIF_EXPR) { tree z = build_zero_cst (TREE_TYPE (op1)); - return build_conditional_expr (location, op0, op1, z, complain); + result = build_conditional_expr (location, op0, op1, z, + complain); } else if (code == TRUTH_ORIF_EXPR) { tree m1 = build_all_ones_cst (TREE_TYPE (op1)); - return build_conditional_expr (location, op0, m1, op1, complain); + result = build_conditional_expr (location, op0, m1, op1, + complain); } else gcc_unreachable (); + + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } if (gnu_vector_type_p (type0) && (!VECTOR_TYPE_P (type1) || gnu_vector_type_p (type1))) @@ -4809,7 +4840,11 @@ cp_build_binary_op (const op_location_t else gcc_unreachable (); - return cp_build_binary_op (location, code, op0, op1, complain); + result = cp_build_binary_op (location, code, op0, op1, complain); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } result_type = boolean_type_node; @@ -5075,7 +5110,13 @@ cp_build_binary_op (const op_location_t result_type = TREE_TYPE (op0); } else if (TYPE_PTRMEMFUNC_P (type1) && null_ptr_cst_p (orig_op0)) - return cp_build_binary_op (location, code, op1, op0, complain); + { + result = cp_build_binary_op (location, code, op1, op0, complain); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; + } else if (TYPE_PTRMEMFUNC_P (type0) && TYPE_PTRMEMFUNC_P (type1)) { tree type; @@ -5181,9 +5222,14 @@ cp_build_binary_op (const op_location_t e = cp_build_binary_op (location, TRUTH_ANDIF_EXPR, e2, e1, complain); if (code == EQ_EXPR) - return e; - return cp_build_binary_op (location, - EQ_EXPR, e, integer_zero_node, complain); + result = e; + else + result = cp_build_binary_op (location, EQ_EXPR, e, + integer_zero_node, complain); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } else { @@ -5286,7 +5332,11 @@ cp_build_binary_op (const op_location_t } result_type = build_opaque_vector_type (intt, TYPE_VECTOR_SUBPARTS (type0)); - return build_vec_cmp (resultcode, result_type, op0, op1); + result = build_vec_cmp (resultcode, result_type, op0, op1); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } build_type = boolean_type_node; if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE @@ -5342,7 +5392,10 @@ cp_build_binary_op (const op_location_t op1 = save_expr (op1); tree tt = builtin_decl_explicit (BUILT_IN_ASAN_POINTER_COMPARE); - instrument_expr = build_call_expr_loc (location, tt, 2, op0, op1); + instrument_expr + = add_stmt_to_compound (instrument_expr, + build_call_expr_loc (location, tt, 2, + op0, op1)); } break; @@ -5488,11 +5541,13 @@ cp_build_binary_op (const op_location_t need to build the tree in pieces. This built tree will never get out of the front-end as we replace it when instantiating the template. */ - tree tmp = build2 (resultcode, - build_type ? build_type : result_type, - NULL_TREE, op1); - TREE_OPERAND (tmp, 0) = op0; - return tmp; + result = build2 (resultcode, build_type ? build_type : result_type, + NULL_TREE, op1); + TREE_OPERAND (result, 0) = op0; + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); + return result; } /* Remember the original type; RESULT_TYPE might be changed later on @@ -5579,6 +5634,9 @@ cp_build_binary_op (const op_location_t } } result = build2 (COMPLEX_EXPR, result_type, real, imag); + if (instrument_expr != NULL_TREE) + result = build2 (COMPOUND_EXPR, TREE_TYPE (result), + instrument_expr, result); return result; } @@ -5691,7 +5749,7 @@ cp_build_binary_op (const op_location_t /* In C++17, in both op0 << op1 and op0 >> op1 op0 is sequenced before op1, so if op1 has side-effects, use SAVE_EXPR around op0. */ op0 = cp_save_expr (op0); - instrument_expr = op0; + instrument_expr = add_stmt_to_compound (instrument_expr, op0); } if (sanitize_flags_p ((SANITIZE_SHIFT @@ -5722,18 +5780,15 @@ cp_build_binary_op (const op_location_t } else if (doing_shift && sanitize_flags_p (SANITIZE_SHIFT)) instrument_expr1 = ubsan_instrument_shift (location, code, op0, op1); - if (instrument_expr != NULL) - instrument_expr = add_stmt_to_compound (instrument_expr, - instrument_expr1); - else - instrument_expr = instrument_expr1; + instrument_expr = add_stmt_to_compound (instrument_expr, + instrument_expr1); } result = build2_loc (location, resultcode, build_type, op0, op1); if (final_type != 0) result = cp_convert (final_type, result, complain); - if (instrument_expr != NULL) + if (instrument_expr != NULL_TREE) result = build2 (COMPOUND_EXPR, TREE_TYPE (result), instrument_expr, result); --- gcc/testsuite/g++.dg/warn/Wconversion-pr91993.C.jj 2020-03-05 08:25:45.416531409 +0100 +++ gcc/testsuite/g++.dg/warn/Wconversion-pr91993.C 2020-03-05 08:24:53.734296723 +0100 @@ -0,0 +1,17 @@ +// PR c++/91993 +// { dg-do compile } +// { dg-options "-Wconversion" } + +typedef unsigned char uc; + +int +foo (const uc &a, const uc &b, const uc &c) +{ + return static_cast<uc>(static_cast<uc>(a << 1U) | b) | c; // { dg-bogus "conversion from 'int' to 'unsigned char' may change value" } +} + +int +bar (const uc &a, const uc &b, const uc &c, int &d) +{ + return static_cast<uc>(static_cast<uc>((d++, a) << 1U) | b) | c; // { dg-bogus "conversion from 'int' to 'unsigned char' may change value" } +} --- gcc/testsuite/g++.dg/ubsan/pr91993.C.jj 2020-03-05 08:26:02.473278832 +0100 +++ gcc/testsuite/g++.dg/ubsan/pr91993.C 2020-03-05 08:26:11.344147473 +0100 @@ -0,0 +1,17 @@ +// PR c++/91993 +// { dg-do compile } +// { dg-options "-Wconversion -fsanitize=undefined" } + +typedef unsigned char uc; + +int +foo (const uc &a, const uc &b, const uc &c) +{ + return static_cast<uc>(static_cast<uc>(a << 1U) | b) | c; // { dg-bogus "conversion from 'int' to 'unsigned char' may change value" } +} + +int +bar (const uc &a, const uc &b, const uc &c, int &d) +{ + return static_cast<uc>(static_cast<uc>((d++, a) << 1U) | b) | c; // { dg-bogus "conversion from 'int' to 'unsigned char' may change value" } +} Jakub