Hi! This patch implements one of the few remaining missing ubsan sanitizations, in particular instrumentation which complains about loads of (non-bitfield) bool or enum (the latter in C++ only) value that is outside of the range allowed (0/1 for bool, min/max for minimum precision integer type holding all values as the standard says).
Is this still acceptable for trunk (it is missing part of a new feature in 4.9, doesn't affect code unless one of -fsanitize={undefined,bool,enum} is used)? 2013-12-14 Jakub Jelinek <ja...@redhat.com> * ubsan.c: Include tree-ssanames.h, asan.h and gimplify-me.h. (ubsan_type_descriptor): Handle BOOLEAN_TYPE and ENUMERAL_TYPE like INTEGER_TYPE. (instrument_bool_enum_load): New function. (ubsan_pass): Call it. (gate_ubsan): Also enable for SANITIZE_BOOL or SANITIZE_ENUM. * asan.c (create_cond_insert_point): No longer static. * asan.h (create_cond_insert_point): Declare. * sanitizer.def (BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE): New built-in. * opts.c (common_handle_option): Handle -fsanitize=bool and -fsanitize=enum. * builtins.c (fold_builtin_memory_op): When sanitizing bool and enum loads, don't use enum or bool types for memcpy folding. * flag-types.h (SANITIZE_BOOL, SANITIZE_ENUM): New. (SANITIZE_UNDEFINED): Or these in. * c-c++-common/ubsan/load-bool-enum.c: New test. --- gcc/ubsan.c.jj 2013-12-10 08:52:13.000000000 +0100 +++ gcc/ubsan.c 2013-12-14 19:39:29.099146079 +0100 @@ -43,6 +43,9 @@ along with GCC; see the file COPYING3. #include "c-family/c-common.h" #include "rtl.h" #include "expr.h" +#include "tree-ssanames.h" +#include "asan.h" +#include "gimplify-me.h" /* Map from a tree to a VAR_DECL tree. */ @@ -344,6 +347,8 @@ ubsan_type_descriptor (tree type, bool w switch (TREE_CODE (type)) { + case BOOLEAN_TYPE: + case ENUMERAL_TYPE: case INTEGER_TYPE: tkind = 0x0000; break; @@ -733,6 +738,106 @@ instrument_si_overflow (gimple_stmt_iter } } +/* Instrument loads from (non-bitfield) bool and C++ enum values + to check if the memory value is outside of the range of the valid + type values. */ + +static void +instrument_bool_enum_load (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + tree rhs = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (rhs); + tree minv = NULL_TREE, maxv = NULL_TREE; + + if (TREE_CODE (type) == BOOLEAN_TYPE && (flag_sanitize & SANITIZE_BOOL)) + { + minv = boolean_false_node; + maxv = boolean_true_node; + } + else if (TREE_CODE (type) == ENUMERAL_TYPE + && (flag_sanitize & SANITIZE_ENUM) + && TREE_TYPE (type) != NULL_TREE + && TREE_CODE (TREE_TYPE (type)) == INTEGER_TYPE + && (TYPE_PRECISION (TREE_TYPE (type)) + < GET_MODE_PRECISION (TYPE_MODE (type)))) + { + minv = TYPE_MIN_VALUE (TREE_TYPE (type)); + maxv = TYPE_MAX_VALUE (TREE_TYPE (type)); + } + else + return; + + int modebitsize = GET_MODE_BITSIZE (TYPE_MODE (type)); + HOST_WIDE_INT bitsize, bitpos; + tree offset; + enum machine_mode mode; + int volatilep = 0, unsignedp = 0; + tree base = get_inner_reference (rhs, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &volatilep, false); + tree utype = build_nonstandard_integer_type (modebitsize, 1); + + if ((TREE_CODE (base) == VAR_DECL && DECL_HARD_REGISTER (base)) + || (bitpos % modebitsize) != 0 + || bitsize != modebitsize + || GET_MODE_BITSIZE (TYPE_MODE (utype)) != modebitsize + || TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME) + return; + + location_t loc = gimple_location (stmt); + tree ptype = build_pointer_type (TREE_TYPE (rhs)); + tree atype = reference_alias_ptr_type (rhs); + gimple g = gimple_build_assign (make_ssa_name (ptype, NULL), + build_fold_addr_expr (rhs)); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + tree mem = build2 (MEM_REF, utype, gimple_assign_lhs (g), + build_int_cst (atype, 0)); + tree urhs = make_ssa_name (utype, NULL); + g = gimple_build_assign (urhs, mem); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + minv = fold_convert (utype, minv); + maxv = fold_convert (utype, maxv); + if (!integer_zerop (minv)) + { + g = gimple_build_assign_with_ops (MINUS_EXPR, + make_ssa_name (utype, NULL), + urhs, minv); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + } + + gimple_stmt_iterator gsi2 = *gsi; + basic_block then_bb, fallthru_bb; + *gsi = create_cond_insert_point (gsi, /*before_p=*/true, + /*then_more_likely_p=*/false, + /*create_then_fallthru_edge=*/true, + &then_bb, &fallthru_bb); + g = gimple_build_cond (GT_EXPR, gimple_assign_lhs (g), + int_const_binop (MINUS_EXPR, maxv, minv), + NULL_TREE, NULL_TREE); + gimple_set_location (g, loc); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + + gimple_assign_set_rhs_with_ops (&gsi2, NOP_EXPR, urhs, NULL_TREE); + update_stmt (stmt); + + tree data = ubsan_create_data ("__ubsan_invalid_value_data", + loc, NULL, + ubsan_type_descriptor (type, false), + NULL_TREE); + data = build_fold_addr_expr_loc (loc, data); + tree fn = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE); + + gsi2 = gsi_after_labels (then_bb); + tree val = force_gimple_operand_gsi (&gsi2, ubsan_encode_value (urhs), + true, NULL_TREE, true, GSI_SAME_STMT); + g = gimple_build_call (fn, 2, data, val); + gimple_set_location (g, loc); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); +} + /* Gate and execute functions for ubsan pass. */ static unsigned int @@ -764,6 +869,10 @@ ubsan_pass (void) instrument_null (gsi, false); } + if (flag_sanitize & (SANITIZE_BOOL | SANITIZE_ENUM) + && gimple_assign_load_p (stmt)) + instrument_bool_enum_load (&gsi); + gsi_next (&gsi); } } @@ -773,7 +882,8 @@ ubsan_pass (void) static bool gate_ubsan (void) { - return flag_sanitize & (SANITIZE_NULL | SANITIZE_SI_OVERFLOW); + return flag_sanitize & (SANITIZE_NULL | SANITIZE_SI_OVERFLOW + | SANITIZE_BOOL | SANITIZE_ENUM); } namespace { --- gcc/asan.c.jj 2013-12-10 08:52:09.000000000 +0100 +++ gcc/asan.c 2013-12-14 15:20:11.908858259 +0100 @@ -1337,7 +1337,7 @@ report_error_func (bool is_store, int si same as what ITER was pointing to prior to calling this function, if BEFORE_P is true; otherwise, it is its following statement. */ -static gimple_stmt_iterator +gimple_stmt_iterator create_cond_insert_point (gimple_stmt_iterator *iter, bool before_p, bool then_more_likely_p, --- gcc/sanitizer.def.jj 2013-12-05 09:39:36.000000000 +0100 +++ gcc/sanitizer.def 2013-12-14 17:14:23.621861421 +0100 @@ -331,3 +331,7 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HAN "__ubsan_handle_negate_overflow", BT_FN_VOID_PTR_PTR, ATTR_COLD_NOTHROW_LEAF_LIST) +DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_LOAD_INVALID_VALUE, + "__ubsan_handle_load_invalid_value", + BT_FN_VOID_PTR_PTR, + ATTR_COLD_NOTHROW_LEAF_LIST) --- gcc/asan.h.jj 2013-11-28 08:29:48.000000000 +0100 +++ gcc/asan.h 2013-12-14 15:22:31.738103074 +0100 @@ -29,6 +29,9 @@ extern bool asan_protect_global (tree); extern void initialize_sanitizer_builtins (void); extern tree asan_dynamic_init_call (bool); +extern gimple_stmt_iterator create_cond_insert_point + (gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *); + /* Alias set for accessing the shadow memory. */ extern alias_set_type asan_shadow_set; --- gcc/opts.c.jj 2013-12-05 09:39:37.000000000 +0100 +++ gcc/opts.c 2013-12-14 19:38:43.048383201 +0100 @@ -1462,6 +1462,8 @@ common_handle_option (struct gcc_options { "null", SANITIZE_NULL, sizeof "null" - 1 }, { "signed-integer-overflow", SANITIZE_SI_OVERFLOW, sizeof "signed-integer-overflow" -1 }, + { "bool", SANITIZE_BOOL, sizeof "bool" - 1 }, + { "enum", SANITIZE_ENUM, sizeof "enum" - 1 }, { NULL, 0, 0 } }; const char *comma; --- gcc/builtins.c.jj 2013-12-02 14:33:32.000000000 +0100 +++ gcc/builtins.c 2013-12-14 17:10:00.442238386 +0100 @@ -8912,6 +8912,29 @@ fold_builtin_memory_op (location_t loc, off0 = build_int_cst (build_pointer_type_for_mode (char_type_node, ptr_mode, true), 0); + /* For -fsanitize={bool,enum} make sure the load isn't performed in + the bool or enum type. */ + if (((flag_sanitize & SANITIZE_BOOL) + && TREE_CODE (desttype) == BOOLEAN_TYPE) + || ((flag_sanitize & SANITIZE_ENUM) + && TREE_CODE (desttype) == ENUMERAL_TYPE)) + { + tree destitype + = lang_hooks.types.type_for_mode (TYPE_MODE (desttype), + TYPE_UNSIGNED (desttype)); + desttype = build_aligned_type (destitype, TYPE_ALIGN (desttype)); + } + if (((flag_sanitize & SANITIZE_BOOL) + && TREE_CODE (srctype) == BOOLEAN_TYPE) + || ((flag_sanitize & SANITIZE_ENUM) + && TREE_CODE (srctype) == ENUMERAL_TYPE)) + { + tree srcitype + = lang_hooks.types.type_for_mode (TYPE_MODE (srctype), + TYPE_UNSIGNED (srctype)); + srctype = build_aligned_type (srcitype, TYPE_ALIGN (srctype)); + } + destvar = dest; STRIP_NOPS (destvar); if (TREE_CODE (destvar) == ADDR_EXPR --- gcc/flag-types.h.jj 2013-12-05 09:39:38.000000000 +0100 +++ gcc/flag-types.h 2013-12-14 14:00:29.434032718 +0100 @@ -216,9 +216,11 @@ enum sanitize_code { SANITIZE_NULL = 1 << 7, SANITIZE_RETURN = 1 << 8, SANITIZE_SI_OVERFLOW = 1 << 9, + SANITIZE_BOOL = 1 << 10, + SANITIZE_ENUM = 1 << 11, SANITIZE_UNDEFINED = SANITIZE_SHIFT | SANITIZE_DIVIDE | SANITIZE_UNREACHABLE | SANITIZE_VLA | SANITIZE_NULL | SANITIZE_RETURN - | SANITIZE_SI_OVERFLOW + | SANITIZE_SI_OVERFLOW | SANITIZE_BOOL | SANITIZE_ENUM }; /* flag_vtable_verify initialization levels. */ --- gcc/testsuite/c-c++-common/ubsan/load-bool-enum.c.jj 2013-12-14 19:25:57.271405245 +0100 +++ gcc/testsuite/c-c++-common/ubsan/load-bool-enum.c 2013-12-14 19:41:13.409599574 +0100 @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-options "-fsanitize=bool,enum" } */ + +#ifndef __cplusplus +#define bool _Bool +#endif +enum A { B = -3, C = 2 } a; +bool b; + +__attribute__((noinline, noclone)) enum A +foo (bool *p) +{ + *p = b; /* { dg-output "load-bool-enum.c:13:\[^\n\r]*runtime error: load of value 4, which is not a valid value for type '(_B|b)ool'(\n|\r\n|\r)" } */ + return a; /* { dg-output "\[^\n\r]*load-bool-enum.c:14:\[^\n\r]*runtime error: load of value 9, which is not a valid value for type 'A'(\n|\r\n|\r)" { target c++ } } */ +} + +int +main () +{ + char c = 4; + int d = 9; + if (sizeof (int) != sizeof (a) || sizeof (b) != 1) + return 0; + __builtin_memcpy (&a, &d, sizeof (int)); + __builtin_memcpy (&b, &c, 1); + bool e; + foo (&e); + return 0; +} Jakub