On Tue, 9 Sep 2014, Jakub Jelinek wrote: > Hi! > > On Fri, Jun 27, 2014 at 09:13:07AM +0200, Jakub Jelinek wrote: > > The patch adds two new (trivial handlers) to libubsan, as it is maintained > > in llvm's compiler-rt, will talk to them if they are interested in those > > and what exact wording and form (AFAIK clang also added the gcc > > {,returns_}nonnull attributes). If they wouldn't be interested, guess > > we could add them in a separate, gcc owned, source file in ubsan (like we > > own Makefile*). > > Now that the compiler-rt bits landed up upstream, here is an updated > version of the patch. > > First here is mostly ubsan infrastructure change so that ubsan_create_data > can handle more cases, together with an improvement not to emit UBSAN_BOUNDS > when it already during gimplification provably can't overflow. > What the ubsan_create_data changes allow is more than one locus at the > beginning and arbitrary data, not just mismatch pair, after all the > typedescriptors. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. > 2014-09-09 Jakub Jelinek <ja...@redhat.com> > > * ubsan.h (struct ubsan_mismatch_data): Removed. > (ubsan_create_data): Remove MISMATCH argument, add LOCCNT argument. > * ubsan.c (ubsan_source_location): For unknown locations, > pass { NULL, 0, 0 } instead of { "<unknown>", x, y }. > (ubsan_create_data): Remove MISMATCH argument, add LOCCNT argument. > Allow more than one location and arbitrary extra arguments passed > in ... instead of through MISMATCH pointer. > (ubsan_instrument_unreachable, ubsan_expand_bounds_ifn, > ubsan_expand_null_ifn, ubsan_build_overflow_builtin, > instrument_bool_enum_load, ubsan_instrument_float_cast): Adjust > callers. > c-family/ > * c-ubsan.c (ubsan_instrument_division, ubsan_instrument_shift, > ubsan_instrument_vla, ubsan_instrument_return): Adjust > ubsan_create_data callers. > (ubsan_instrument_bounds): Don't emit UBSAN_BOUNDS at all if > index is constant or BIT_AND_EXPR with constant mask and is > small enough for the bound. > * c-gimplify.c (ubsan_walk_array_refs_r): For ADDR_EXPR of > ARRAY_REF, make sure the inner ARRAY_REF is not walked again. > > --- gcc/ubsan.h.jj 2014-09-08 22:12:27.591741242 +0200 > +++ gcc/ubsan.h 2014-09-09 09:35:04.461393547 +0200 > @@ -38,17 +38,10 @@ enum ubsan_print_style { > UBSAN_PRINT_ARRAY > }; > > -/* An extra data used by ubsan pointer checking. */ > -struct ubsan_mismatch_data { > - tree align; > - tree ckind; > -}; > - > extern bool ubsan_expand_bounds_ifn (gimple_stmt_iterator *); > extern bool ubsan_expand_null_ifn (gimple_stmt_iterator *); > extern tree ubsan_instrument_unreachable (location_t); > -extern tree ubsan_create_data (const char *, const location_t *, > - const struct ubsan_mismatch_data *, ...); > +extern tree ubsan_create_data (const char *, int, const location_t *, ...); > extern tree ubsan_type_descriptor (tree, enum ubsan_print_style = > UBSAN_PRINT_NORMAL); > extern tree ubsan_encode_value (tree, bool = false); > extern bool is_ubsan_builtin_p (tree); > --- gcc/ubsan.c.jj 2014-09-08 22:12:27.553741427 +0200 > +++ gcc/ubsan.c 2014-09-09 09:40:23.908791347 +0200 > @@ -242,17 +242,24 @@ ubsan_source_location (location_t loc) > tree type = ubsan_source_location_type (); > > xloc = expand_location (loc); > + tree str; > if (xloc.file == NULL) > - xloc.file = "<unknown>"; > - > - /* Fill in the values from LOC. */ > - size_t len = strlen (xloc.file); > - tree str = build_string (len + 1, xloc.file); > - TREE_TYPE (str) = build_array_type (char_type_node, > - build_index_type (size_int (len))); > - TREE_READONLY (str) = 1; > - TREE_STATIC (str) = 1; > - str = build_fold_addr_expr (str); > + { > + str = build_int_cst (ptr_type_node, 0); > + xloc.line = 0; > + xloc.column = 0; > + } > + else > + { > + /* Fill in the values from LOC. */ > + size_t len = strlen (xloc.file); > + str = build_string (len + 1, xloc.file); > + TREE_TYPE (str) = build_array_type (char_type_node, > + build_index_type (size_int (len))); > + TREE_READONLY (str) = 1; > + TREE_STATIC (str) = 1; > + str = build_fold_addr_expr (str); > + } > tree ctor = build_constructor_va (type, 3, NULL_TREE, str, NULL_TREE, > build_int_cst (unsigned_type_node, > xloc.line), NULL_TREE, > @@ -451,20 +458,20 @@ ubsan_type_descriptor (tree type, enum u > } > > /* Create a structure for the ubsan library. NAME is a name of the new > - structure. The arguments in ... are of __ubsan_type_descriptor type > - and there are at most two of them. MISMATCH are data used by ubsan > - pointer checking. */ > + structure. LOCCNT is number of locations, PLOC points to array of > + locations. The arguments in ... are of __ubsan_type_descriptor type > + and there are at most two of them, followed by NULL_TREE, followed > + by optional extra arguments and another NULL_TREE. */ > > tree > -ubsan_create_data (const char *name, const location_t *ploc, > - const struct ubsan_mismatch_data *mismatch, ...) > +ubsan_create_data (const char *name, int loccnt, const location_t *ploc, ...) > { > va_list args; > tree ret, t; > - tree fields[5]; > + tree fields[6]; > vec<tree, va_gc> *saved_args = NULL; > size_t i = 0; > - location_t loc = UNKNOWN_LOCATION; > + int j; > > /* Firstly, create a pointer to type descriptor type. */ > tree td_type = ubsan_type_descriptor_type (); > @@ -473,20 +480,22 @@ ubsan_create_data (const char *name, con > > /* Create the structure type. */ > ret = make_node (RECORD_TYPE); > - if (ploc != NULL) > + for (j = 0; j < loccnt; j++) > { > - loc = LOCATION_LOCUS (*ploc); > + gcc_checking_assert (i < 2); > fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, > ubsan_source_location_type ()); > DECL_CONTEXT (fields[i]) = ret; > + if (i) > + DECL_CHAIN (fields[i - 1]) = fields[i]; > i++; > } > > - va_start (args, mismatch); > + va_start (args, ploc); > for (t = va_arg (args, tree); t != NULL_TREE; > i++, t = va_arg (args, tree)) > { > - gcc_checking_assert (i < 3); > + gcc_checking_assert (i < 4); > /* Save the tree arguments for later use. */ > vec_safe_push (saved_args, t); > fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, > @@ -495,23 +504,20 @@ ubsan_create_data (const char *name, con > if (i) > DECL_CHAIN (fields[i - 1]) = fields[i]; > } > - va_end (args); > > - if (mismatch != NULL) > + for (t = va_arg (args, tree); t != NULL_TREE; > + i++, t = va_arg (args, tree)) > { > - /* We have to add two more decls. */ > - fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, > - pointer_sized_int_node); > - DECL_CONTEXT (fields[i]) = ret; > - DECL_CHAIN (fields[i - 1]) = fields[i]; > - i++; > - > + gcc_checking_assert (i < 6); > + /* Save the tree arguments for later use. */ > + vec_safe_push (saved_args, t); > fields[i] = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, > - unsigned_char_type_node); > + TREE_TYPE (t)); > DECL_CONTEXT (fields[i]) = ret; > - DECL_CHAIN (fields[i - 1]) = fields[i]; > - i++; > + if (i) > + DECL_CHAIN (fields[i - 1]) = fields[i]; > } > + va_end (args); > > TYPE_FIELDS (ret) = fields[0]; > TYPE_NAME (ret) = get_identifier (name); > @@ -534,8 +540,11 @@ ubsan_create_data (const char *name, con > tree ctor = build_constructor (ret, v); > > /* If desirable, set the __ubsan_source_location element. */ > - if (ploc != NULL) > - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, ubsan_source_location (loc)); > + for (j = 0; j < loccnt; j++) > + { > + location_t loc = LOCATION_LOCUS (ploc[j]); > + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, ubsan_source_location (loc)); > + } > > size_t nelts = vec_safe_length (saved_args); > for (i = 0; i < nelts; i++) > @@ -544,13 +553,6 @@ ubsan_create_data (const char *name, con > CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); > } > > - if (mismatch != NULL) > - { > - /* Append the pointer data. */ > - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, mismatch->align); > - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, mismatch->ckind); > - } > - > TREE_CONSTANT (ctor) = 1; > TREE_STATIC (ctor) = 1; > DECL_INITIAL (var) = ctor; > @@ -569,7 +571,7 @@ ubsan_instrument_unreachable (location_t > return build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), > 0); > > initialize_sanitizer_builtins (); > - tree data = ubsan_create_data ("__ubsan_unreachable_data", &loc, NULL, > + tree data = ubsan_create_data ("__ubsan_unreachable_data", 1, &loc, > NULL_TREE, > NULL_TREE); > tree t = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE); > return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, > data)); > @@ -622,10 +624,10 @@ ubsan_expand_bounds_ifn (gimple_stmt_ite > else > { > tree data > - = ubsan_create_data ("__ubsan_out_of_bounds_data", &loc, NULL, > + = ubsan_create_data ("__ubsan_out_of_bounds_data", 1, &loc, > ubsan_type_descriptor (type, UBSAN_PRINT_ARRAY), > ubsan_type_descriptor (orig_index_type), > - NULL_TREE); > + NULL_TREE, NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > enum built_in_function bcode > = flag_sanitize_recover > @@ -735,12 +737,13 @@ ubsan_expand_null_ifn (gimple_stmt_itera > ? BUILT_IN_UBSAN_HANDLE_TYPE_MISMATCH > : BUILT_IN_UBSAN_HANDLE_TYPE_MISMATCH_ABORT; > tree fn = builtin_decl_implicit (bcode); > - const struct ubsan_mismatch_data m > - = { align, fold_convert (unsigned_char_type_node, ckind) }; > tree data > - = ubsan_create_data ("__ubsan_null_data", &loc, &m, > + = ubsan_create_data ("__ubsan_null_data", 1, &loc, > ubsan_type_descriptor (TREE_TYPE (ckind), > UBSAN_PRINT_POINTER), > + NULL_TREE, > + align, > + fold_convert (unsigned_char_type_node, ckind), > NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > g = gimple_build_call (fn, 2, data, > @@ -875,8 +878,9 @@ ubsan_build_overflow_builtin (tree_code > if (flag_sanitize_undefined_trap_on_error) > return build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), > 0); > > - tree data = ubsan_create_data ("__ubsan_overflow_data", &loc, NULL, > - ubsan_type_descriptor (lhstype), NULL_TREE); > + tree data = ubsan_create_data ("__ubsan_overflow_data", 1, &loc, > + ubsan_type_descriptor (lhstype), NULL_TREE, > + NULL_TREE); > enum built_in_function fn_code; > > switch (code) > @@ -1069,8 +1073,9 @@ instrument_bool_enum_load (gimple_stmt_i > g = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); > else > { > - tree data = ubsan_create_data ("__ubsan_invalid_value_data", &loc, > NULL, > - ubsan_type_descriptor (type), NULL_TREE); > + tree data = ubsan_create_data ("__ubsan_invalid_value_data", 1, &loc, > + ubsan_type_descriptor (type), NULL_TREE, > + NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > enum built_in_function bcode > = flag_sanitize_recover > @@ -1189,9 +1194,10 @@ ubsan_instrument_float_cast (location_t > else > { > /* Create the __ubsan_handle_float_cast_overflow fn call. */ > - tree data = ubsan_create_data ("__ubsan_float_cast_overflow_data", > NULL, > + tree data = ubsan_create_data ("__ubsan_float_cast_overflow_data", 0, > NULL, ubsan_type_descriptor (expr_type), > - ubsan_type_descriptor (type), NULL_TREE); > + ubsan_type_descriptor (type), NULL_TREE, > + NULL_TREE); > enum built_in_function bcode > = flag_sanitize_recover > ? BUILT_IN_UBSAN_HANDLE_FLOAT_CAST_OVERFLOW > --- gcc/c-family/c-ubsan.c.jj 2014-09-08 22:12:27.728740567 +0200 > +++ gcc/c-family/c-ubsan.c 2014-09-09 11:10:51.467565697 +0200 > @@ -99,8 +99,9 @@ ubsan_instrument_division (location_t lo > tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0); > else > { > - tree data = ubsan_create_data ("__ubsan_overflow_data", &loc, NULL, > - ubsan_type_descriptor (type), NULL_TREE); > + tree data = ubsan_create_data ("__ubsan_overflow_data", 1, &loc, > + ubsan_type_descriptor (type), NULL_TREE, > + NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > enum built_in_function bcode > = flag_sanitize_recover > @@ -191,9 +192,10 @@ ubsan_instrument_shift (location_t loc, > tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0); > else > { > - tree data = ubsan_create_data ("__ubsan_shift_data", &loc, NULL, > + tree data = ubsan_create_data ("__ubsan_shift_data", 1, &loc, > ubsan_type_descriptor (type0), > - ubsan_type_descriptor (type1), NULL_TREE); > + ubsan_type_descriptor (type1), NULL_TREE, > + NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > > enum built_in_function bcode > @@ -222,8 +224,9 @@ ubsan_instrument_vla (location_t loc, tr > tt = build_call_expr_loc (loc, builtin_decl_explicit (BUILT_IN_TRAP), 0); > else > { > - tree data = ubsan_create_data ("__ubsan_vla_data", &loc, NULL, > - ubsan_type_descriptor (type), NULL_TREE); > + tree data = ubsan_create_data ("__ubsan_vla_data", 1, &loc, > + ubsan_type_descriptor (type), NULL_TREE, > + NULL_TREE); > data = build_fold_addr_expr_loc (loc, data); > enum built_in_function bcode > = flag_sanitize_recover > @@ -248,8 +251,8 @@ ubsan_instrument_return (location_t loc) > builtins. Reinitialize them if needed. */ > initialize_sanitizer_builtins (); > > - tree data = ubsan_create_data ("__ubsan_missing_return_data", &loc, > - NULL, NULL_TREE); > + tree data = ubsan_create_data ("__ubsan_missing_return_data", 1, &loc, > + NULL_TREE, NULL_TREE); > tree t = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_MISSING_RETURN); > return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, > data)); > } > @@ -305,6 +308,19 @@ ubsan_instrument_bounds (location_t loc, > return NULL_TREE; > } > > + /* Don't emit instrumentation in the most common cases. */ > + tree idx = NULL_TREE; > + if (TREE_CODE (*index) == INTEGER_CST) > + idx = *index; > + else if (TREE_CODE (*index) == BIT_AND_EXPR > + && TREE_CODE (TREE_OPERAND (*index, 1)) == INTEGER_CST) > + idx = TREE_OPERAND (*index, 1); > + if (idx > + && TREE_CODE (bound) == INTEGER_CST > + && tree_int_cst_sgn (idx) >= 0 > + && tree_int_cst_le (idx, bound)) > + return NULL_TREE; > + > *index = save_expr (*index); > /* Create a "(T *) 0" tree node to describe the array type. */ > tree zero_with_type = build_int_cst (build_pointer_type (type), 0); > --- gcc/c-family/c-gimplify.c.jj 2014-08-12 15:42:58.000000000 +0200 > +++ gcc/c-family/c-gimplify.c 2014-09-09 11:10:21.847711230 +0200 > @@ -95,7 +95,20 @@ ubsan_walk_array_refs_r (tree *tp, int * > } > else if (TREE_CODE (*tp) == ADDR_EXPR > && TREE_CODE (TREE_OPERAND (*tp, 0)) == ARRAY_REF) > - ubsan_maybe_instrument_array_ref (&TREE_OPERAND (*tp, 0), true); > + { > + ubsan_maybe_instrument_array_ref (&TREE_OPERAND (*tp, 0), true); > + /* Make sure ubsan_maybe_instrument_array_ref is not called again > + on the ARRAY_REF, the above call might not instrument anything > + as the index might be constant or masked, so ensure it is not > + walked again and walk its subtrees manually. */ > + tree aref = TREE_OPERAND (*tp, 0); > + pset->add (aref); > + *walk_subtrees = 0; > + walk_tree (&TREE_OPERAND (aref, 0), ubsan_walk_array_refs_r, pset, > pset); > + walk_tree (&TREE_OPERAND (aref, 1), ubsan_walk_array_refs_r, pset, > pset); > + walk_tree (&TREE_OPERAND (aref, 2), ubsan_walk_array_refs_r, pset, > pset); > + walk_tree (&TREE_OPERAND (aref, 3), ubsan_walk_array_refs_r, pset, > pset); > + } > else if (TREE_CODE (*tp) == ARRAY_REF) > ubsan_maybe_instrument_array_ref (tp, false); > return NULL_TREE; > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer