On Thu, May 22, 2014 at 04:06:21PM +0400, Konstantin Serebryany wrote: > Not really recently... (Feb 2013) > http://llvm.org/viewvc/llvm-project?rev=175507&view=rev
Ah, wasn't aware of that. Here is (so far not bootstrapped/regtested) patch for the GCC side. Notes: 1) the cases where we e.g. for various stringops perform first and last address check and use separate __asan_report_*1 for reporting that should probably be converted to use this __asan_report_*_n too 2) it doesn't still deal with unaligned power of two accesses properly, but neither does llvm (at least not 3.4). Am not talking about undefined behavior cases where the compiler isn't told the access is misaligned, but e.g. when accessing struct S { int x; } __attribute__((packed)) and similar (or aligned(1)). Supposedly we could force __asan_report_*_n for that case too, because normal wider check assumes it is aligned 3) there is still a failure for -m32: FAIL: g++.dg/asan/asan_test.C -O2 AddressSanitizer_UAF_long_double Ident(p)[12] = 0 output pattern test Output should match: WRITE of size 1[06] FAIL: g++.dg/asan/asan_test.C -O2 AddressSanitizer_UAF_long_double Ident(p)[0] = Ident(p)[12] output pattern test Output should match: READ of size 1[06] That sounds like something to fix in upstream, it should allow also size 12 which is the size of long double on ia32 (16 bytes on x86_64), thus 1[026]. Kostya, can you please change it, I'll then apply it to the testsuite patch too? As mentioned earlier, ubsan has similar problem where it doesn't recognize float bitsize 96 (but unlike this case where clang+llvm pass in 10 bytes, which is what is actually accessed by hw if using i387 stack, but not if using other means of copying it, in ubsan case clang also passes in bitsize 96 that ubsan doesn't handle). I'll bootstrap/regtest this later today. 2014-05-22 Jakub Jelinek <ja...@redhat.com> * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N, BUILT_IN_ASAN_REPORT_STORE_N): New. * asan.c (struct asan_mem_ref): Change access_size type to HOST_WIDE_INT. (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call, update_mem_ref_hash_table): Likewise. (asan_mem_ref_hasher::hash): Hash in a HWI. (report_error_func): Change size_in_bytes argument to HWI. Use *_N builtins if size_in_bytes is larger than 16 or not power of two. (build_shadow_mem_access): New function. (build_check_stmt): Use it. Change size_in_bytes argument to HWI. Handle size_in_bytes not power of two or larger than 16. (instrument_derefs): Don't give up if size_in_bytes is not power of two or is larger than 16. --- gcc/sanitizer.def.jj 2014-05-22 10:18:01.000000000 +0200 +++ gcc/sanitizer.def 2014-05-22 14:13:27.859513839 +0200 @@ -41,6 +41,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD16, "__asan_report_load16", BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD_N, "__asan_report_load_n", + BT_FN_VOID_PTR_PTRMODE, + ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE1, "__asan_report_store1", BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE2, "__asan_report_store2", @@ -51,6 +54,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE16, "__asan_report_store16", BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE_N, "__asan_report_store_n", + BT_FN_VOID_PTR_PTRMODE, + ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REGISTER_GLOBALS, "__asan_register_globals", BT_FN_VOID_PTR_PTRMODE, ATTR_NOTHROW_LEAF_LIST) --- gcc/asan.c.jj 2014-05-11 22:21:23.000000000 +0200 +++ gcc/asan.c 2014-05-22 15:28:30.125998730 +0200 @@ -251,8 +251,8 @@ struct asan_mem_ref /* The expression of the beginning of the memory region. */ tree start; - /* The size of the access (can be 1, 2, 4, 8, 16 for now). */ - char access_size; + /* The size of the access. */ + HOST_WIDE_INT access_size; }; static alloc_pool asan_mem_ref_alloc_pool; @@ -274,7 +274,7 @@ asan_mem_ref_get_alloc_pool () /* Initializes an instance of asan_mem_ref. */ static void -asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) +asan_mem_ref_init (asan_mem_ref *ref, tree start, HOST_WIDE_INT access_size) { ref->start = start; ref->access_size = access_size; @@ -287,7 +287,7 @@ asan_mem_ref_init (asan_mem_ref *ref, tr access to the referenced memory. */ static asan_mem_ref* -asan_mem_ref_new (tree start, char access_size) +asan_mem_ref_new (tree start, HOST_WIDE_INT access_size) { asan_mem_ref *ref = (asan_mem_ref *) pool_alloc (asan_mem_ref_get_alloc_pool ()); @@ -334,7 +334,7 @@ inline hashval_t asan_mem_ref_hasher::hash (const asan_mem_ref *mem_ref) { hashval_t h = iterative_hash_expr (mem_ref->start, 0); - h = iterative_hash_hashval_t (h, mem_ref->access_size); + h = iterative_hash_host_wide_int (mem_ref->access_size, h); return h; } @@ -392,7 +392,7 @@ free_mem_ref_resources () /* Return true iff the memory reference REF has been instrumented. */ static bool -has_mem_ref_been_instrumented (tree ref, char access_size) +has_mem_ref_been_instrumented (tree ref, HOST_WIDE_INT access_size) { asan_mem_ref r; asan_mem_ref_init (&r, ref, access_size); @@ -480,7 +480,7 @@ get_mem_refs_of_builtin_call (const gimp tree source0 = NULL_TREE, source1 = NULL_TREE, dest = NULL_TREE, len = NULL_TREE; bool is_store = true, got_reference_p = false; - char access_size = 1; + HOST_WIDE_INT access_size = 1; switch (DECL_FUNCTION_CODE (callee)) { @@ -842,7 +842,7 @@ has_stmt_been_instrumented_p (gimple stm /* Insert a memory reference into the hash table. */ static void -update_mem_ref_hash_table (tree ref, char access_size) +update_mem_ref_hash_table (tree ref, HOST_WIDE_INT access_size) { hash_table <asan_mem_ref_hasher> ht = get_mem_ref_hash_table (); @@ -1315,20 +1315,22 @@ asan_protect_global (tree decl) return true; } -/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. - IS_STORE is either 1 (for a store) or 0 (for a load). - SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16,_n}. + IS_STORE is either 1 (for a store) or 0 (for a load). */ static tree -report_error_func (bool is_store, int size_in_bytes) +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes) { - static enum built_in_function report[2][5] + static enum built_in_function report[2][6] = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, BUILT_IN_ASAN_REPORT_LOAD4, BUILT_IN_ASAN_REPORT_LOAD8, - BUILT_IN_ASAN_REPORT_LOAD16 }, + BUILT_IN_ASAN_REPORT_LOAD16, BUILT_IN_ASAN_REPORT_LOAD_N }, { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2, BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, - BUILT_IN_ASAN_REPORT_STORE16 } }; + BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; + if ((size_in_bytes & (size_in_bytes - 1)) != 0 + || size_in_bytes > 16) + return builtin_decl_implicit (report[is_store][5]); return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]); } @@ -1450,6 +1452,47 @@ insert_if_then_before_iter (gimple cond, gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT); } +/* Build + (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ + +static tree +build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, + tree base_addr, tree shadow_ptr_type) +{ + tree t, uintptr_type = TREE_TYPE (base_addr); + tree shadow_type = TREE_TYPE (shadow_ptr_type); + gimple g; + + t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT); + g = gimple_build_assign_with_ops (RSHIFT_EXPR, + make_ssa_name (uintptr_type, NULL), + base_addr, t); + gimple_set_location (g, location); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + + t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ()); + g = gimple_build_assign_with_ops (PLUS_EXPR, + make_ssa_name (uintptr_type, NULL), + gimple_assign_lhs (g), t); + gimple_set_location (g, location); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + + g = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (shadow_ptr_type, NULL), + gimple_assign_lhs (g), NULL_TREE); + gimple_set_location (g, location); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + + t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g), + build_int_cst (shadow_ptr_type, 0)); + g = gimple_build_assign_with_ops (MEM_REF, + make_ssa_name (shadow_type, NULL), + t, NULL_TREE); + gimple_set_location (g, location); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + return gimple_assign_lhs (g); +} + /* Instrument the memory access instruction BASE. Insert new statements before or after ITER. @@ -1457,8 +1500,7 @@ insert_if_then_before_iter (gimple cond, SSA_NAME, or a non-SSA expression. LOCATION is the source code location. IS_STORE is TRUE for a store, FALSE for a load. BEFORE_P is TRUE for inserting the instrumentation code before - ITER, FALSE for inserting it after ITER. SIZE_IN_BYTES is one of - 1, 2, 4, 8, 16. + ITER, FALSE for inserting it after ITER. If BEFORE_P is TRUE, *ITER is arranged to still point to the statement it was pointing to prior to calling this function, @@ -1466,7 +1508,7 @@ insert_if_then_before_iter (gimple cond, static void build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, - bool before_p, bool is_store, int size_in_bytes) + bool before_p, bool is_store, HOST_WIDE_INT size_in_bytes) { gimple_stmt_iterator gsi; basic_block then_bb, else_bb; @@ -1477,6 +1519,12 @@ build_check_stmt (location_t location, t tree uintptr_type = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (base)), 1); tree base_ssa = base; + HOST_WIDE_INT real_size_in_bytes = size_in_bytes; + tree sz_arg = NULL_TREE; + + if ((size_in_bytes & (size_in_bytes - 1)) != 0 + || size_in_bytes > 16) + real_size_in_bytes = 1; /* Get an iterator on the point where we can add the condition statement for the instrumentation. */ @@ -1509,51 +1557,24 @@ build_check_stmt (location_t location, t /* Build (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ + shadow = build_shadow_mem_access (&gsi, location, base_addr, + shadow_ptr_type); - t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT); - g = gimple_build_assign_with_ops (RSHIFT_EXPR, - make_ssa_name (uintptr_type, NULL), - base_addr, t); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - - t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ()); - g = gimple_build_assign_with_ops (PLUS_EXPR, - make_ssa_name (uintptr_type, NULL), - gimple_assign_lhs (g), t); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - - g = gimple_build_assign_with_ops (NOP_EXPR, - make_ssa_name (shadow_ptr_type, NULL), - gimple_assign_lhs (g), NULL_TREE); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - - t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g), - build_int_cst (shadow_ptr_type, 0)); - g = gimple_build_assign_with_ops (MEM_REF, - make_ssa_name (shadow_type, NULL), - t, NULL_TREE); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - shadow = gimple_assign_lhs (g); - - if (size_in_bytes < 8) + if (real_size_in_bytes < 8) { /* Slow path for 1, 2 and 4 byte accesses. Test (shadow != 0) - & ((base_addr & 7) + (size_in_bytes - 1)) >= shadow). */ + & ((base_addr & 7) + (real_size_in_bytes - 1)) >= shadow). */ gimple_seq seq = NULL; gimple shadow_test = build_assign (NE_EXPR, shadow, 0); gimple_seq_add_stmt (&seq, shadow_test); gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7)); gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, gimple_seq_last (seq))); - if (size_in_bytes > 1) + if (real_size_in_bytes > 1) gimple_seq_add_stmt (&seq, build_assign (PLUS_EXPR, gimple_seq_last (seq), - size_in_bytes - 1)); + real_size_in_bytes - 1)); gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, gimple_seq_last (seq), shadow)); gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, @@ -1561,6 +1582,39 @@ build_check_stmt (location_t location, t t = gimple_assign_lhs (gimple_seq_last (seq)); gimple_seq_set_location (seq, location); gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + /* For weird access sizes, check first and last byte. */ + if (real_size_in_bytes != size_in_bytes) + { + g = gimple_build_assign_with_ops (PLUS_EXPR, + make_ssa_name (uintptr_type, NULL), + base_addr, + build_int_cst (uintptr_type, + size_in_bytes - 1)); + gimple_set_location (g, location); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + tree base_end_addr = gimple_assign_lhs (g); + + shadow = build_shadow_mem_access (&gsi, location, base_end_addr, + shadow_ptr_type); + seq = NULL; + shadow_test = build_assign (NE_EXPR, shadow, 0); + gimple_seq_add_stmt (&seq, shadow_test); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, + base_end_addr, 7)); + gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, + gimple_seq_last (seq))); + gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, + gimple_seq_last (seq), + shadow)); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, + gimple_seq_last (seq))); + gimple_seq_add_stmt (&seq, build_assign (BIT_IOR_EXPR, t, + gimple_seq_last (seq))); + t = gimple_assign_lhs (gimple_seq_last (seq)); + gimple_seq_set_location (seq, location); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + sz_arg = build_int_cst (pointer_sized_int_node, size_in_bytes); + } } else t = shadow; @@ -1573,7 +1627,7 @@ build_check_stmt (location_t location, t /* Generate call to the run-time library (e.g. __asan_report_load8). */ gsi = gsi_start_bb (then_bb); g = gimple_build_call (report_error_func (is_store, size_in_bytes), - 1, base_addr); + sz_arg ? 2 : 1, base_addr, sz_arg); gimple_set_location (g, location); gsi_insert_after (&gsi, g, GSI_NEW_STMT); @@ -1611,8 +1665,7 @@ instrument_derefs (gimple_stmt_iterator } size_in_bytes = int_size_in_bytes (type); - if ((size_in_bytes & (size_in_bytes - 1)) != 0 - || (unsigned HOST_WIDE_INT) size_in_bytes - 1 >= 16) + if (size_in_bytes <= 0) return; HOST_WIDE_INT bitsize, bitpos; @@ -1621,7 +1674,8 @@ instrument_derefs (gimple_stmt_iterator int volatilep = 0, unsignedp = 0; tree inner = get_inner_reference (t, &bitsize, &bitpos, &offset, &mode, &unsignedp, &volatilep, false); - if (bitpos % (size_in_bytes * BITS_PER_UNIT) + if (((size_in_bytes & (size_in_bytes - 1)) == 0 + && (bitpos % (size_in_bytes * BITS_PER_UNIT))) || bitsize != size_in_bytes * BITS_PER_UNIT) { if (TREE_CODE (t) == COMPONENT_REF @@ -1634,6 +1688,8 @@ instrument_derefs (gimple_stmt_iterator } return; } + if (bitpos % BITS_PER_UNIT) + return; if (TREE_CODE (inner) == VAR_DECL && offset == NULL_TREE Jakub