On Tue, Nov 19, 2013 at 10:07:00AM -0500, Jason Merrill wrote: > On 11/18/2013 11:39 AM, Marek Polacek wrote: > >+ init = fold_build2 (COMPOUND_EXPR, TREE_TYPE (init), > >+ ubsan_instrument_reference (input_location, init), > >+ init); > > This looks like it will evaluate init twice.
You're right. I wanted to use cp_save_expr and/or stabilize_expr, but that didn't work out. So I resorted to restrict the condition a bit and only pass INDIRECT_REFs to the ubsan routine (which, after all, has if (!INDIRECT_REF_P (init)) return init; And in that case, it seems we don't have to worry about multiple evaluation of the initializer. Nevertheless, I added a test that checks that we don't evaluate the initializer twice. BTW, we still detect e.g. int *p = 0; int &qr = ++*p; while clang doesn't detect it at all. So, ok for trunk? Ubsan testsuite passes. 2013-12-03 Marek Polacek <pola...@redhat.com> * c-family/c-ubsan.h (extern tree ubsan_instrument_division): * c-family/c-ubsan.c (ubsan_instrument_return): (ubsan_instrument_reference): * cp/decl.c (cp_finish_decl): * testsuite/g++.dg/ubsan/null-1.C (main): --- gcc/c-family/c-ubsan.h.mp2 2013-12-03 19:07:28.382466550 +0100 +++ gcc/c-family/c-ubsan.h 2013-12-03 19:09:02.245819384 +0100 @@ -25,5 +25,6 @@ extern tree ubsan_instrument_division (l extern tree ubsan_instrument_shift (location_t, enum tree_code, tree, tree); extern tree ubsan_instrument_vla (location_t, tree); extern tree ubsan_instrument_return (location_t); +extern tree ubsan_instrument_reference (location_t, tree); #endif /* GCC_C_UBSAN_H */ --- gcc/c-family/c-ubsan.c.mp2 2013-12-03 19:07:33.437485687 +0100 +++ gcc/c-family/c-ubsan.c 2013-12-03 19:08:15.649643663 +0100 @@ -190,3 +190,30 @@ ubsan_instrument_return (location_t loc) tree t = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_MISSING_RETURN); return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, data)); } + +/* Instrument reference binding, that is, ensure that the reference + declaration doesn't bind the reference to a NULL pointer. */ + +tree +ubsan_instrument_reference (location_t loc, tree init) +{ + if (!INDIRECT_REF_P (init)) + /* This may happen, e.g. int &&r4 = p;, so don't put an assert here. */ + return init; + + init = TREE_OPERAND (init, 0); + tree eq_expr = fold_build2 (EQ_EXPR, boolean_type_node, init, + build_zero_cst (TREE_TYPE (init))); + const struct ubsan_mismatch_data m + = { build_zero_cst (pointer_sized_int_node), + build_int_cst (unsigned_char_type_node, UBSAN_REF_BINDING)}; + tree data = ubsan_create_data ("__ubsan_null_data", + loc, &m, + ubsan_type_descriptor (TREE_TYPE (init), + true), NULL_TREE); + data = build_fold_addr_expr_loc (loc, data); + tree fn = builtin_decl_implicit (BUILT_IN_UBSAN_HANDLE_TYPE_MISMATCH); + fn = build_call_expr_loc (loc, fn, 2, data, + build_zero_cst (pointer_sized_int_node)); + return fold_build3 (COND_EXPR, void_type_node, eq_expr, fn, void_zero_node); +} --- gcc/cp/decl.c.mp2 2013-12-03 19:07:38.678505371 +0100 +++ gcc/cp/decl.c 2013-12-03 20:40:34.649989640 +0100 @@ -6245,6 +6245,12 @@ cp_finish_decl (tree decl, tree init, bo if (decl_maybe_constant_var_p (decl)) TREE_CONSTANT (decl) = 1; } + if (flag_sanitize & SANITIZE_NULL + && TREE_CODE (type) == REFERENCE_TYPE + && INDIRECT_REF_P (init)) + init = fold_build2 (COMPOUND_EXPR, TREE_TYPE (init), + ubsan_instrument_reference (input_location, init), + init); } if (processing_template_decl) --- gcc/testsuite/g++.dg/ubsan/null-1.C.mp2 2013-11-22 16:16:05.073181508 +0100 +++ gcc/testsuite/g++.dg/ubsan/null-1.C 2013-12-03 19:23:48.351305205 +0100 @@ -0,0 +1,32 @@ +// { dg-do run } +// { dg-options "-fsanitize=null -Wall -Wno-unused-variable -std=c++11" } +// { dg-skip-if "" { *-*-* } { "-flto" } { "" } } + +typedef const long int L; + +int +main (void) +{ + int *p = 0; + L *l = 0; + + int &r = *p; + auto &r2 = *p; + L &lr = *l; + + /* Try an rvalue reference. */ + auto &&r3 = *p; + + /* Don't evaluate the reference initializer twice. */ + int i = 1; + int *q = &i; + int &qr = ++*q; + if (i != 2) + __builtin_abort (); + return 0; +} + +// { dg-output "reference binding to null pointer of type 'int'(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*reference binding to null pointer of type 'const L'(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" } Marek