On Thu, May 22, 2014 at 6:07 PM, Jakub Jelinek <ja...@redhat.com> wrote: > On Thu, May 22, 2014 at 04:06:21PM +0400, Konstantin Serebryany wrote: >> Not really recently... (Feb 2013) >> http://llvm.org/viewvc/llvm-project?rev=175507&view=rev > > Ah, wasn't aware of that. > > Here is (so far not bootstrapped/regtested) patch for the GCC side. > > Notes: > 1) the cases where we e.g. for various stringops perform first and > last address check and use separate __asan_report_*1 for reporting > that should probably be converted to use this __asan_report_*_n too
Note that in clang we completely removed the code that instruments srtingops (we had memset/memcpy/memmove). Instead we replace memset with __asan_memset (ditto for memcpy/memmove) so that it does not get inlined later. This simplifies the code and allows to properly analyze all memset/etc calls, not just check the first and the last bytes. > 2) it doesn't still deal with unaligned power of two accesses properly, > but neither does llvm (at least not 3.4). Am not talking about > undefined behavior cases where the compiler isn't told the access > is misaligned, but e.g. when accessing struct S { int x; } > __attribute__((packed)) and similar (or aligned(1)). Supposedly > we could force __asan_report_*_n for that case too, because > normal wider check assumes it is aligned Yep, we don't do it. > 3) there is still a failure for -m32: > FAIL: g++.dg/asan/asan_test.C -O2 AddressSanitizer_UAF_long_double > Ident(p)[12] = 0 output pattern test > Output should match: WRITE of size 1[06] > FAIL: g++.dg/asan/asan_test.C -O2 AddressSanitizer_UAF_long_double > Ident(p)[0] = Ident(p)[12] output pattern test > Output should match: READ of size 1[06] > That sounds like something to fix in upstream, it should allow also size > 12 which is the size of long double on ia32 (16 bytes on x86_64), > thus 1[026]. Kostya, can you please change it, I'll then apply it > to the testsuite patch too? Like this? --- lib/asan/tests/asan_test.cc (revision 209430) +++ lib/asan/tests/asan_test.cc (working copy) @@ -183,8 +183,8 @@ TEST(AddressSanitizer, UAF_long_double) { if (sizeof(long double) == sizeof(double)) return; long double *p = Ident(new long double[10]); - EXPECT_DEATH(Ident(p)[12] = 0, "WRITE of size 1[06]"); - EXPECT_DEATH(Ident(p)[0] = Ident(p)[12], "READ of size 1[06]"); + EXPECT_DEATH(Ident(p)[12] = 0, "WRITE of size 1[026]"); + EXPECT_DEATH(Ident(p)[0] = Ident(p)[12], "READ of size 1[026]"); delete [] Ident(p); } > As mentioned earlier, ubsan has similar > problem where it doesn't recognize float bitsize 96 (but unlike this > case where clang+llvm pass in 10 bytes, which is what is actually > accessed by hw if using i387 stack, but not if using other means of > copying it, in ubsan case clang also passes in bitsize 96 that ubsan > doesn't handle). Yea, this long double business is rather confusing to me... --kcc > > I'll bootstrap/regtest this later today. > > 2014-05-22 Jakub Jelinek <ja...@redhat.com> > > * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N, > BUILT_IN_ASAN_REPORT_STORE_N): New. > * asan.c (struct asan_mem_ref): Change access_size type to > HOST_WIDE_INT. > (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call, > update_mem_ref_hash_table): Likewise. > (asan_mem_ref_hasher::hash): Hash in a HWI. > (report_error_func): Change size_in_bytes argument to HWI. > Use *_N builtins if size_in_bytes is larger than 16 or not power of > two. > (build_shadow_mem_access): New function. > (build_check_stmt): Use it. Change size_in_bytes argument to HWI. > Handle size_in_bytes not power of two or larger than 16. > (instrument_derefs): Don't give up if size_in_bytes is not > power of two or is larger than 16. > > --- gcc/sanitizer.def.jj 2014-05-22 10:18:01.000000000 +0200 > +++ gcc/sanitizer.def 2014-05-22 14:13:27.859513839 +0200 > @@ -41,6 +41,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO > BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD16, "__asan_report_load16", > BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD_N, "__asan_report_load_n", > + BT_FN_VOID_PTR_PTRMODE, > + ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE1, "__asan_report_store1", > BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE2, "__asan_report_store2", > @@ -51,6 +54,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO > BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE16, "__asan_report_store16", > BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE_N, "__asan_report_store_n", > + BT_FN_VOID_PTR_PTRMODE, > + ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST) > DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REGISTER_GLOBALS, > "__asan_register_globals", > BT_FN_VOID_PTR_PTRMODE, ATTR_NOTHROW_LEAF_LIST) > --- gcc/asan.c.jj 2014-05-11 22:21:23.000000000 +0200 > +++ gcc/asan.c 2014-05-22 15:28:30.125998730 +0200 > @@ -251,8 +251,8 @@ struct asan_mem_ref > /* The expression of the beginning of the memory region. */ > tree start; > > - /* The size of the access (can be 1, 2, 4, 8, 16 for now). */ > - char access_size; > + /* The size of the access. */ > + HOST_WIDE_INT access_size; > }; > > static alloc_pool asan_mem_ref_alloc_pool; > @@ -274,7 +274,7 @@ asan_mem_ref_get_alloc_pool () > /* Initializes an instance of asan_mem_ref. */ > > static void > -asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) > +asan_mem_ref_init (asan_mem_ref *ref, tree start, HOST_WIDE_INT access_size) > { > ref->start = start; > ref->access_size = access_size; > @@ -287,7 +287,7 @@ asan_mem_ref_init (asan_mem_ref *ref, tr > access to the referenced memory. */ > > static asan_mem_ref* > -asan_mem_ref_new (tree start, char access_size) > +asan_mem_ref_new (tree start, HOST_WIDE_INT access_size) > { > asan_mem_ref *ref = > (asan_mem_ref *) pool_alloc (asan_mem_ref_get_alloc_pool ()); > @@ -334,7 +334,7 @@ inline hashval_t > asan_mem_ref_hasher::hash (const asan_mem_ref *mem_ref) > { > hashval_t h = iterative_hash_expr (mem_ref->start, 0); > - h = iterative_hash_hashval_t (h, mem_ref->access_size); > + h = iterative_hash_host_wide_int (mem_ref->access_size, h); > return h; > } > > @@ -392,7 +392,7 @@ free_mem_ref_resources () > /* Return true iff the memory reference REF has been instrumented. */ > > static bool > -has_mem_ref_been_instrumented (tree ref, char access_size) > +has_mem_ref_been_instrumented (tree ref, HOST_WIDE_INT access_size) > { > asan_mem_ref r; > asan_mem_ref_init (&r, ref, access_size); > @@ -480,7 +480,7 @@ get_mem_refs_of_builtin_call (const gimp > tree source0 = NULL_TREE, source1 = NULL_TREE, > dest = NULL_TREE, len = NULL_TREE; > bool is_store = true, got_reference_p = false; > - char access_size = 1; > + HOST_WIDE_INT access_size = 1; > > switch (DECL_FUNCTION_CODE (callee)) > { > @@ -842,7 +842,7 @@ has_stmt_been_instrumented_p (gimple stm > /* Insert a memory reference into the hash table. */ > > static void > -update_mem_ref_hash_table (tree ref, char access_size) > +update_mem_ref_hash_table (tree ref, HOST_WIDE_INT access_size) > { > hash_table <asan_mem_ref_hasher> ht = get_mem_ref_hash_table (); > > @@ -1315,20 +1315,22 @@ asan_protect_global (tree decl) > return true; > } > > -/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. > - IS_STORE is either 1 (for a store) or 0 (for a load). > - SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ > +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16,_n}. > + IS_STORE is either 1 (for a store) or 0 (for a load). */ > > static tree > -report_error_func (bool is_store, int size_in_bytes) > +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes) > { > - static enum built_in_function report[2][5] > + static enum built_in_function report[2][6] > = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, > BUILT_IN_ASAN_REPORT_LOAD4, BUILT_IN_ASAN_REPORT_LOAD8, > - BUILT_IN_ASAN_REPORT_LOAD16 }, > + BUILT_IN_ASAN_REPORT_LOAD16, BUILT_IN_ASAN_REPORT_LOAD_N }, > { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2, > BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, > - BUILT_IN_ASAN_REPORT_STORE16 } }; > + BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; > + if ((size_in_bytes & (size_in_bytes - 1)) != 0 > + || size_in_bytes > 16) > + return builtin_decl_implicit (report[is_store][5]); > return builtin_decl_implicit (report[is_store][exact_log2 > (size_in_bytes)]); > } > > @@ -1450,6 +1452,47 @@ insert_if_then_before_iter (gimple cond, > gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT); > } > > +/* Build > + (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ > + > +static tree > +build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, > + tree base_addr, tree shadow_ptr_type) > +{ > + tree t, uintptr_type = TREE_TYPE (base_addr); > + tree shadow_type = TREE_TYPE (shadow_ptr_type); > + gimple g; > + > + t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT); > + g = gimple_build_assign_with_ops (RSHIFT_EXPR, > + make_ssa_name (uintptr_type, NULL), > + base_addr, t); > + gimple_set_location (g, location); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + > + t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ()); > + g = gimple_build_assign_with_ops (PLUS_EXPR, > + make_ssa_name (uintptr_type, NULL), > + gimple_assign_lhs (g), t); > + gimple_set_location (g, location); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + > + g = gimple_build_assign_with_ops (NOP_EXPR, > + make_ssa_name (shadow_ptr_type, NULL), > + gimple_assign_lhs (g), NULL_TREE); > + gimple_set_location (g, location); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + > + t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g), > + build_int_cst (shadow_ptr_type, 0)); > + g = gimple_build_assign_with_ops (MEM_REF, > + make_ssa_name (shadow_type, NULL), > + t, NULL_TREE); > + gimple_set_location (g, location); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + return gimple_assign_lhs (g); > +} > + > /* Instrument the memory access instruction BASE. Insert new > statements before or after ITER. > > @@ -1457,8 +1500,7 @@ insert_if_then_before_iter (gimple cond, > SSA_NAME, or a non-SSA expression. LOCATION is the source code > location. IS_STORE is TRUE for a store, FALSE for a load. > BEFORE_P is TRUE for inserting the instrumentation code before > - ITER, FALSE for inserting it after ITER. SIZE_IN_BYTES is one of > - 1, 2, 4, 8, 16. > + ITER, FALSE for inserting it after ITER. > > If BEFORE_P is TRUE, *ITER is arranged to still point to the > statement it was pointing to prior to calling this function, > @@ -1466,7 +1508,7 @@ insert_if_then_before_iter (gimple cond, > > static void > build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, > - bool before_p, bool is_store, int size_in_bytes) > + bool before_p, bool is_store, HOST_WIDE_INT size_in_bytes) > { > gimple_stmt_iterator gsi; > basic_block then_bb, else_bb; > @@ -1477,6 +1519,12 @@ build_check_stmt (location_t location, t > tree uintptr_type > = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (base)), 1); > tree base_ssa = base; > + HOST_WIDE_INT real_size_in_bytes = size_in_bytes; > + tree sz_arg = NULL_TREE; > + > + if ((size_in_bytes & (size_in_bytes - 1)) != 0 > + || size_in_bytes > 16) > + real_size_in_bytes = 1; > > /* Get an iterator on the point where we can add the condition > statement for the instrumentation. */ > @@ -1509,51 +1557,24 @@ build_check_stmt (location_t location, t > > /* Build > (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ > + shadow = build_shadow_mem_access (&gsi, location, base_addr, > + shadow_ptr_type); > > - t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT); > - g = gimple_build_assign_with_ops (RSHIFT_EXPR, > - make_ssa_name (uintptr_type, NULL), > - base_addr, t); > - gimple_set_location (g, location); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > - > - t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ()); > - g = gimple_build_assign_with_ops (PLUS_EXPR, > - make_ssa_name (uintptr_type, NULL), > - gimple_assign_lhs (g), t); > - gimple_set_location (g, location); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > - > - g = gimple_build_assign_with_ops (NOP_EXPR, > - make_ssa_name (shadow_ptr_type, NULL), > - gimple_assign_lhs (g), NULL_TREE); > - gimple_set_location (g, location); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > - > - t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g), > - build_int_cst (shadow_ptr_type, 0)); > - g = gimple_build_assign_with_ops (MEM_REF, > - make_ssa_name (shadow_type, NULL), > - t, NULL_TREE); > - gimple_set_location (g, location); > - gsi_insert_after (&gsi, g, GSI_NEW_STMT); > - shadow = gimple_assign_lhs (g); > - > - if (size_in_bytes < 8) > + if (real_size_in_bytes < 8) > { > /* Slow path for 1, 2 and 4 byte accesses. > Test (shadow != 0) > - & ((base_addr & 7) + (size_in_bytes - 1)) >= shadow). */ > + & ((base_addr & 7) + (real_size_in_bytes - 1)) >= shadow). */ > gimple_seq seq = NULL; > gimple shadow_test = build_assign (NE_EXPR, shadow, 0); > gimple_seq_add_stmt (&seq, shadow_test); > gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7)); > gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, > gimple_seq_last (seq))); > - if (size_in_bytes > 1) > + if (real_size_in_bytes > 1) > gimple_seq_add_stmt (&seq, > build_assign (PLUS_EXPR, gimple_seq_last (seq), > - size_in_bytes - 1)); > + real_size_in_bytes - 1)); > gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, gimple_seq_last > (seq), > shadow)); > gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, > @@ -1561,6 +1582,39 @@ build_check_stmt (location_t location, t > t = gimple_assign_lhs (gimple_seq_last (seq)); > gimple_seq_set_location (seq, location); > gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); > + /* For weird access sizes, check first and last byte. */ > + if (real_size_in_bytes != size_in_bytes) > + { > + g = gimple_build_assign_with_ops (PLUS_EXPR, > + make_ssa_name (uintptr_type, > NULL), > + base_addr, > + build_int_cst (uintptr_type, > + size_in_bytes - > 1)); > + gimple_set_location (g, location); > + gsi_insert_after (&gsi, g, GSI_NEW_STMT); > + tree base_end_addr = gimple_assign_lhs (g); > + > + shadow = build_shadow_mem_access (&gsi, location, base_end_addr, > + shadow_ptr_type); > + seq = NULL; > + shadow_test = build_assign (NE_EXPR, shadow, 0); > + gimple_seq_add_stmt (&seq, shadow_test); > + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, > + base_end_addr, 7)); > + gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, > + gimple_seq_last (seq))); > + gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, > + gimple_seq_last (seq), > + shadow)); > + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, > + gimple_seq_last (seq))); > + gimple_seq_add_stmt (&seq, build_assign (BIT_IOR_EXPR, t, > + gimple_seq_last (seq))); > + t = gimple_assign_lhs (gimple_seq_last (seq)); > + gimple_seq_set_location (seq, location); > + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); > + sz_arg = build_int_cst (pointer_sized_int_node, size_in_bytes); > + } > } > else > t = shadow; > @@ -1573,7 +1627,7 @@ build_check_stmt (location_t location, t > /* Generate call to the run-time library (e.g. __asan_report_load8). */ > gsi = gsi_start_bb (then_bb); > g = gimple_build_call (report_error_func (is_store, size_in_bytes), > - 1, base_addr); > + sz_arg ? 2 : 1, base_addr, sz_arg); > gimple_set_location (g, location); > gsi_insert_after (&gsi, g, GSI_NEW_STMT); > > @@ -1611,8 +1665,7 @@ instrument_derefs (gimple_stmt_iterator > } > > size_in_bytes = int_size_in_bytes (type); > - if ((size_in_bytes & (size_in_bytes - 1)) != 0 > - || (unsigned HOST_WIDE_INT) size_in_bytes - 1 >= 16) > + if (size_in_bytes <= 0) > return; > > HOST_WIDE_INT bitsize, bitpos; > @@ -1621,7 +1674,8 @@ instrument_derefs (gimple_stmt_iterator > int volatilep = 0, unsignedp = 0; > tree inner = get_inner_reference (t, &bitsize, &bitpos, &offset, > &mode, &unsignedp, &volatilep, false); > - if (bitpos % (size_in_bytes * BITS_PER_UNIT) > + if (((size_in_bytes & (size_in_bytes - 1)) == 0 > + && (bitpos % (size_in_bytes * BITS_PER_UNIT))) > || bitsize != size_in_bytes * BITS_PER_UNIT) > { > if (TREE_CODE (t) == COMPONENT_REF > @@ -1634,6 +1688,8 @@ instrument_derefs (gimple_stmt_iterator > } > return; > } > + if (bitpos % BITS_PER_UNIT) > + return; > > if (TREE_CODE (inner) == VAR_DECL > && offset == NULL_TREE > > > Jakub