On Tue, 2015-12-08 at 16:38 +0100, Bernd Schmidt wrote: > On 12/08/2015 04:43 PM, David Malcolm wrote: > > This fixes various uninitialized src_range of c_expr, this time > > in the various builtins that are parsed via c_parser_get_builtin_args. > > > > Bootstrapped®rtested on x86_64-pc-linux-gnu. > > > > OK for trunk? > > I think both of these patches are OK. Some questions though. > > > " a constant"); > > constant_expression_warning (c); > > expr = integer_zerop (c) ? *e3_p : *e2_p; > > + set_c_expr_source_range (&expr, loc, close_paren_loc); > > If that had an uninitialized range, it implies that the *eN_p > expressions also have uninitialized parts. Correct? If so, I think we > should fix that.
Hmmm... re-reading the patch, I think my description was sloppy; sorry. I believe uninitialized data affected RID_BUILTIN_COMPLEX and RID_BUILTIN_SHUFFLE, whereas RID_CHOOSE_EXPR and RID_BUILTIN_CALL_WITH_STATIC_CHAIN copied src_range data from subexpressions. The fix for the uninitialized data made me think "what should the source range of RID_BUILTIN_COMPLEX be?" Hence I noticed that each of the places where c_parser_get_builtin_args was used didn't have an explicit choice about src_range, and that it ought to use the builtin args in its range, and so I updated these. Within RID_CHOOSE_EXPR and RID_BUILTIN_CALL_WITH_STATIC_CHAIN, *eN_P come from c_parser_get_builtin_args, and reviewing that function, it purely gets them by copying them from the return value of c_parser_expr_no_commas, so there's no uninitialized data introduced there. [There could be an argument that for RID_CHOOSE_EXPR we should use the range of the chosen expression, which was the status quo for that case; the patch makes it use the range of the full __builtin_choose_expr () for consistency with other the builtins]. (I believe the proposed ChangeLog does accurately reflect the change; just the title may need tweaking). > Also, what happened to the idea of a constructor for c_expr_t? I actually implemented something like this when implementing these two patches. Work-in-progress patch attached, which introduces an INVALID_LOCATION value for source_location/location_t, and uses it to "poison" the initial value of c_expr's src_range, with lots of assertions to verify that it's been overwritten by time we use it. It doesn't fully work yet, but much of gcc.dg survives with this (and it's what I used to detect the alignof issue in patch 2). Does this look like something I should pursue?
>From cf8aa843a7bb5f173eb8107b1c721e964b9e0677 Mon Sep 17 00:00:00 2001 From: David Malcolm <dmalc...@redhat.com> Date: Mon, 7 Dec 2015 13:47:03 -0500 Subject: [PATCH] WIP: add INVALID_LOCATION and use it to poison src_range --- gcc/c/c-parser.c | 8 +++++++- gcc/c/c-tree.h | 26 ++++++++++++++++++++++++++ gcc/c/c-typeck.c | 2 ++ gcc/tree.c | 6 ++++++ libcpp/include/line-map.h | 10 +++++++++- libcpp/line-map.c | 7 +++++++ 6 files changed, 57 insertions(+), 2 deletions(-) diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 4611e5b..cded9fe 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -63,6 +63,8 @@ void set_c_expr_source_range (c_expr *expr, location_t start, location_t finish) { + gcc_assert (start != INVALID_LOCATION); + gcc_assert (finish != INVALID_LOCATION); expr->src_range.m_start = start; expr->src_range.m_finish = finish; if (expr->value) @@ -73,6 +75,8 @@ void set_c_expr_source_range (c_expr *expr, source_range src_range) { + gcc_assert (src_range.m_start != INVALID_LOCATION); + gcc_assert (src_range.m_finish != INVALID_LOCATION); expr->src_range = src_range; if (expr->value) set_source_range (expr->value, src_range); @@ -6145,6 +6149,8 @@ c_parser_expr_no_commas (c_parser *parser, struct c_expr *after, c_parser_consume_token (parser); exp_location = c_parser_peek_token (parser)->location; rhs = c_parser_expr_no_commas (parser, NULL); + gcc_assert (rhs.src_range.m_start != INVALID_LOCATION); + gcc_assert (rhs.src_range.m_finish != INVALID_LOCATION); rhs = convert_lvalue_to_rvalue (exp_location, rhs, true, true); ret.value = build_modify_expr (op_location, lhs.value, lhs.original_type, @@ -6917,7 +6923,6 @@ c_parser_alignof_expression (c_parser *parser) } else { - struct c_expr ret; expr = c_parser_unary_expression (parser); end_loc = expr.src_range.m_finish; alignof_expr: @@ -6927,6 +6932,7 @@ c_parser_alignof_expression (c_parser *parser) pedwarn (start_loc, OPT_Wpedantic, "ISO C does not allow %<%E (expression)%>", alignof_spelling); + struct c_expr ret; ret.value = c_alignof_expr (start_loc, expr.value); ret.original_code = ERROR_MARK; ret.original_type = NULL; diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 00e72b1..30625ee 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -118,6 +118,32 @@ along with GCC; see the file COPYING3. If not see for code generation alongside a tree representing its value. */ struct c_expr { + c_expr () + { + /* In a checkeed build, poison the initial value of src_range so + that attempts to use uninitialized data fail with an assertion, + without needing to run under Valgrind. */ +#if CHECKING_P + src_range.m_start = INVALID_LOCATION; + src_range.m_finish = INVALID_LOCATION; +#endif + } + + c_expr (const c_expr &other) + { + value = other.value; + original_code = other.original_code; + original_type = other.original_type; +#if CHECKING_P + if (value != error_mark_node) + { + gcc_assert (other.src_range.m_start != INVALID_LOCATION); + gcc_assert (other.src_range.m_finish != INVALID_LOCATION); + } +#endif + src_range = other.src_range; + } + /* The value of the expression. */ tree value; /* Record the original unary/binary operator of an expression, which may diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index b691072..97064ae 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -6821,6 +6821,8 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, expr.value = inside_init; expr.original_code = (strict_string ? STRING_CST : ERROR_MARK); expr.original_type = NULL; + expr.src_range.m_start = init_loc; + expr.src_range.m_finish = init_loc; maybe_warn_string_init (init_loc, type, expr); if (TYPE_DOMAIN (type) && !TYPE_MAX_VALUE (TYPE_DOMAIN (type))) diff --git a/gcc/tree.c b/gcc/tree.c index c8b3ab8..db429c7 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -13938,6 +13938,9 @@ set_block (location_t loc, tree block) location_t set_source_range (tree expr, location_t start, location_t finish) { + gcc_assert (start != INVALID_LOCATION); + gcc_assert (finish != INVALID_LOCATION); + source_range src_range; src_range.m_start = start; src_range.m_finish = finish; @@ -13950,6 +13953,9 @@ set_source_range (tree expr, source_range src_range) if (!EXPR_P (expr)) return UNKNOWN_LOCATION; + gcc_assert (src_range.m_start != INVALID_LOCATION); + gcc_assert (src_range.m_finish != INVALID_LOCATION); + location_t pure_loc = get_pure_location (EXPR_LOCATION (expr)); location_t adhoc = COMBINE_LOCATION_DATA (line_table, pure_loc, diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h index 73c583e..e3cc0d4 100644 --- a/libcpp/include/line-map.h +++ b/libcpp/include/line-map.h @@ -160,7 +160,7 @@ typedef unsigned int linenum_type; -----------+-------------------------------+------------------------------- 0x80000000 | Start of ad-hoc values; the lower 31 bits are used as an index ... | into the line_table->location_adhoc_data_map.data array. - 0xffffffff | UINT_MAX | + 0xffffffff | UINT_MAX == INVALID_LOCATION | -----------+-------------------------------+------------------------------- Examples of location encoding. @@ -260,6 +260,10 @@ typedef unsigned int linenum_type; worked example in libcpp/location-example.txt. */ typedef unsigned int source_location; +/* A bogus value, used for detecting uninitialized data in checked + builds. */ +#define INVALID_LOCATION ((source_location) UINT_MAX) + /* A range of source locations. Ranges are closed: @@ -1004,6 +1008,10 @@ COMBINE_LOCATION_DATA (struct line_maps *set, source_range src_range, void *block) { + linemap_assert (loc != INVALID_LOCATION); + linemap_assert (src_range.m_start != INVALID_LOCATION); + linemap_assert (src_range.m_finish != INVALID_LOCATION); + return get_combined_adhoc_loc (set, loc, src_range, block); } diff --git a/libcpp/line-map.c b/libcpp/line-map.c index 333e835..76c1ec8 100644 --- a/libcpp/line-map.c +++ b/libcpp/line-map.c @@ -84,6 +84,10 @@ location_adhoc_data_eq (const void *l1, const void *l2) (const struct location_adhoc_data *) l1; const struct location_adhoc_data *lb2 = (const struct location_adhoc_data *) l2; + linemap_assert (lb1->src_range.m_start != INVALID_LOCATION); + linemap_assert (lb1->src_range.m_finish != INVALID_LOCATION); + linemap_assert (lb2->src_range.m_start != INVALID_LOCATION); + linemap_assert (lb2->src_range.m_finish != INVALID_LOCATION); return (lb1->locus == lb2->locus && lb1->src_range.m_start == lb2->src_range.m_start && lb1->src_range.m_finish == lb2->src_range.m_finish @@ -166,6 +170,9 @@ get_combined_adhoc_loc (struct line_maps *set, struct location_adhoc_data lb; struct location_adhoc_data **slot; + linemap_assert (src_range.m_start != INVALID_LOCATION); + linemap_assert (src_range.m_finish != INVALID_LOCATION); + if (IS_ADHOC_LOC (locus)) locus = set->location_adhoc_data_map.data[locus & MAX_SOURCE_LOCATION].locus; -- 1.8.5.3