The missing nul detection fails when the argument of the %s or similar sprintf directive is the address of a non-nul character constant such as in:
const char c = 'a'; int f (void) { return snprintf (0, 0, "%s", &c); } This is because the string_constant function only succeeds for arguments that refer to STRRING_CSTs, not to individual characters. For the same reason, calls to memchr() such as the one below aren't folded into constants: const char d = '\0'; void* g (void) { return memchr (&d, 0, 1); } To detect and diagnose the missing nul in the first example and to fold the second, the attached patch modifies string_constant to return a synthesized STRING_CST object for such references (while also indicating whether such an object is properly nul-terminated). Tested on x86_64-linux. Martin
PR tree-optimization/87756 - missing unterminated argument warning using address of a constant character gcc/ChangeLog: PR tree-optimization/87756 * expr.c (string_constant): Synthesize a string literal from the address of a constant character. * tree.c (build_string_literal): Add an argument. * tree.h (build_string_literal): Same. gcc/testsuite/ChangeLog: PR tree-optimization/87756 * gcc.dg/builtin-memchr-2.c: New test. * gcc.dg/builtin-memchr-3.c: Same. * gcc.dg/warn-sprintf-no-nul-2.c: Same. Index: gcc/expr.c =================================================================== --- gcc/expr.c (revision 265496) +++ gcc/expr.c (working copy) @@ -11484,18 +11484,40 @@ string_constant (tree arg, tree *ptr_offset, tree offset = off; } - if (!init || TREE_CODE (init) != STRING_CST) + if (!init) return NULL_TREE; + *ptr_offset = offset; + + tree eltype = TREE_TYPE (init); + tree initsize = TYPE_SIZE_UNIT (eltype); if (mem_size) - *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init)); + *mem_size = initsize; + if (decl) *decl = array; - gcc_checking_assert (tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (init))) - >= TREE_STRING_LENGTH (init)); + if (TREE_CODE (init) == INTEGER_CST) + { + /* For a reference to (address of) a single constant character, + store the native representation of the character in CHARBUF. */ + unsigned char charbuf[MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT]; + int len = native_encode_expr (init, charbuf, sizeof charbuf, 0); + if (len > 0) + { + /* Construct a string literal with elements of ELTYPE and + the representation above. Then strip + the ADDR_EXPR (ARRAY_REF (...)) around the STRING_CST. */ + init = build_string_literal (len, (char *)charbuf, eltype); + init = TREE_OPERAND (TREE_OPERAND (init, 0), 0); + } + } - *ptr_offset = offset; + if (TREE_CODE (init) != STRING_CST) + return NULL_TREE; + + gcc_checking_assert (tree_to_shwi (initsize) >= TREE_STRING_LENGTH (init)); + return init; } Index: gcc/tree.c =================================================================== --- gcc/tree.c (revision 265496) +++ gcc/tree.c (working copy) @@ -11141,25 +11141,28 @@ build_alloca_call_expr (tree size, unsigned int al } } -/* Create a new constant string literal and return a char* pointer to it. - The STRING_CST value is the LEN characters at STR. */ +/* Create a new constant string literal consisting of elements of type + ELTYPE and return a tree node representing char* pointer to it as + an ADDR_EXPR (ARRAY_REF (ELTYPE, ...)). The STRING_CST value is + the LEN bytes at STR (the representation of the string, which may + be wide). */ + tree -build_string_literal (int len, const char *str) +build_string_literal (int len, const char *str, + tree eltype /* = char_type_node */) { - tree t, elem, index, type; - - t = build_string (len, str); - elem = build_type_variant (char_type_node, 1, 0); - index = build_index_type (size_int (len - 1)); - type = build_array_type (elem, index); + tree t = build_string (len, str); + tree index = build_index_type (size_int (len - 1)); + eltype = build_type_variant (eltype, 1, 0); + tree type = build_array_type (eltype, index); TREE_TYPE (t) = type; TREE_CONSTANT (t) = 1; TREE_READONLY (t) = 1; TREE_STATIC (t) = 1; - type = build_pointer_type (elem); + type = build_pointer_type (eltype); t = build1 (ADDR_EXPR, type, - build4 (ARRAY_REF, elem, + build4 (ARRAY_REF, eltype, t, integer_zero_node, NULL_TREE, NULL_TREE)); return t; } Index: gcc/tree.h =================================================================== --- gcc/tree.h (revision 265496) +++ gcc/tree.h (working copy) @@ -4194,7 +4194,7 @@ extern tree build_call_expr_internal_loc_array (lo extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree, int, ...); extern tree build_alloca_call_expr (tree, unsigned int, HOST_WIDE_INT); -extern tree build_string_literal (int, const char *); +extern tree build_string_literal (int, const char *, tree = char_type_node); /* Construct various nodes representing data types. */ Index: gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c =================================================================== --- gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c (working copy) @@ -0,0 +1,131 @@ +/* PR tree-optimization/87756 - missing unterminated argument warning + using address of a constant character + { dg-do compile } + { dg-options "-O2 -Wall -ftrack-macro-expansion=0" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +int sprintf (char*, const char*, ...); + +extern char* dest (void); +extern void sink (int, ...); + +#define D dest () +#define T(expr) sink (0, (expr)) + + +const char cnul = '\0'; +const char cnonul = 'a'; +const char str3[] = "123"; + +const struct +{ + char a, b, s[3]; +} s1 = { '\0', 'b', "123" }, + s2[2] = { + { '\0', 'c', "12" }, + { 'd', '\0', "123" } + }; + +void test_sprintf_s (void) +{ + T (sprintf (D, "%s", &cnul)); + T (sprintf (D, "%s", &cnonul)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &cnonul)); + T (sprintf (D, "%.2s", &cnonul)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &s1.a)); + T (sprintf (D, "%s", &s1.b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &s1.b)); + T (sprintf (D, "%.2s", &s1.b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", s1.s)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.3s", s1.s)); + T (sprintf (D, "%.4s", s1.s)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%.2s", s1.s + 1)); + T (sprintf (D, "%.3s", s1.s + 1)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &s2[0].a)); + T (sprintf (D, "%s", &s2[0].b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &s2[0].b)); + T (sprintf (D, "%.2s", &s2[0].b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", s2[0].s)); + T (sprintf (D, "%.3s", s2[0].s)); + T (sprintf (D, "%.4s", s2[0].s)); + + T (sprintf (D, "%.2s", s2[0].s + 1)); + T (sprintf (D, "%.3s", s2[0].s + 1)); + + T (sprintf (D, "%s", &s2[1].a)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &s2[1].a)); + T (sprintf (D, "%.2s", &s2[1].a)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", &s2[1].b)); + T (sprintf (D, "%s", s2[1].s)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.3s", s2[1].s)); + T (sprintf (D, "%.4s", s2[1].s)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%.2s", s2[1].s + 1)); + T (sprintf (D, "%.3s", s2[1].s + 1)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &str3[3])); + T (sprintf (D, "%s", &str3[4])); /* { dg-warning "\\\[-Warray-bounds" } */ +} + + +const char wnul = '\0'; +const char wnonul = 'a'; +const char wcs3[] = "123"; + +const struct +{ + char a, b, s[3]; +} w1 = { '\0', 'b', "123" }, + w2[2] = { + { '\0', 'c', "12" }, + { 'd', '\0', "123" } + }; + +void test_sprintf_ls (void) +{ + T (sprintf (D, "%s", &wnul)); + T (sprintf (D, "%s", &wnonul)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &wnonul)); + T (sprintf (D, "%.2s", &wnonul)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &w1.a)); + T (sprintf (D, "%s", &w1.b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &w1.b)); + T (sprintf (D, "%.2s", &w1.b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", w1.s)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.3s", w1.s)); + T (sprintf (D, "%.4s", w1.s)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%.2s", w1.s + 1)); + T (sprintf (D, "%.3s", w1.s + 1)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &w2[0].a)); + T (sprintf (D, "%s", &w2[0].b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &w2[0].b)); + T (sprintf (D, "%.2s", &w2[0].b)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", w2[0].s)); + T (sprintf (D, "%.3s", w2[0].s)); + T (sprintf (D, "%.4s", w2[0].s)); + + T (sprintf (D, "%.2s", w2[0].s + 1)); + T (sprintf (D, "%.3s", w2[0].s + 1)); + + T (sprintf (D, "%s", &w2[1].a)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.1s", &w2[1].a)); + T (sprintf (D, "%.2s", &w2[1].a)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%s", &w2[1].b)); + T (sprintf (D, "%s", w2[1].s)); /* { dg-warning "nul-terminated" } */ + T (sprintf (D, "%.3s", w2[1].s)); + T (sprintf (D, "%.4s", w2[1].s)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%.2s", w2[1].s + 1)); + T (sprintf (D, "%.3s", w2[1].s + 1)); /* { dg-warning "nul-terminated" } */ + + T (sprintf (D, "%s", &wcs3[3])); + T (sprintf (D, "%s", &wcs3[4])); /* { dg-warning "\\\[-Warray-bounds" } */ +} Index: gcc/testsuite/gcc.dg/builtin-memchr-2.c =================================================================== --- gcc/testsuite/gcc.dg/builtin-memchr-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/builtin-memchr-2.c (working copy) @@ -0,0 +1,55 @@ +/* Verify that memchr calls with the address of a constant character + are folded as expected even at -O0. + { dg-do compile } + { dg-options "-O0 -Wall -fdump-tree-gimple" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +extern void* memchr (const void*, int, size_t); +extern int printf (const char*, ...); +extern void abort (void); + +#define A(expr) \ + ((expr) \ + ? (void)0 \ + : (printf ("assertion failed on line %i: %s\n", \ + __LINE__, #expr), \ + abort ())) + +const char nul = 0; +const char cha = 'a'; + +const struct +{ + char c; +} snul = { 0 }, + schb = { 'b' }, + sarr[] = { + { 0 }, + { 'c' } + }; + + +void test_memchr_cst_char (void) +{ + A (&nul == memchr (&nul, 0, 1)); + A (!memchr (&nul, 'a', 1)); + + A (&cha == memchr (&cha, 'a', 1)); + A (!memchr (&cha, 0, 1)); + + A (&snul.c == memchr (&snul.c, 0, 1)); + A (!memchr (&snul.c, 'a', 1)); + + A (&schb.c == memchr (&schb.c, 'b', 1)); + A (!memchr (&schb.c, 0, 1)); + + A (&sarr[0].c == memchr (&sarr[0].c, 0, 1)); + A (!memchr (&sarr[0].c, 'a', 1)); + + A (&sarr[1].c == memchr (&sarr[1].c, 'c', 1)); + A (!memchr (&sarr[1].c, 0, 1)); +} + +/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */ Index: gcc/testsuite/gcc.dg/builtin-memchr-3.c =================================================================== --- gcc/testsuite/gcc.dg/builtin-memchr-3.c (nonexistent) +++ gcc/testsuite/gcc.dg/builtin-memchr-3.c (working copy) @@ -0,0 +1,72 @@ +/* Verify that memchr calls with a pointer to a constant character + are folded as expected. + { dg-do compile } + { dg-options "-O1 -Wall -fdump-tree-gimple" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __WCHAR_TYPE__ wchar_t; + +extern void* memchr (const void*, int, size_t); +extern int printf (const char*, ...); +extern void abort (void); + +#define A(expr) \ + ((expr) \ + ? (void)0 \ + : (printf ("assertion failed on line %i: %s\n", \ + __LINE__, #expr), \ + abort ())) + +const char nul = 0; +const char cha = 'a'; + +const char* const pnul = &nul; +const char* const pcha = &cha; + +const struct +{ + char c; +} snul = { 0 }, + schb = { 'b' }, + sarr[] = { + { 0 }, + { 'c' } + }; + +const char* const psarr0c = &sarr[0].c; +const char* const psarr1c = &sarr[1].c; + +void test_memchr_cst_char (void) +{ + A (&nul == memchr (&nul, 0, 1)); + A (!memchr (&nul, 'a', 1)); + + A (&cha == memchr (&cha, 'a', 1)); + A (!memchr (&cha, 0, 1)); + + A (&nul == memchr (pnul, 0, 1)); + A (!memchr (pnul, 'a', 1)); + + A (&cha == memchr (pcha, 'a', 1)); + A (!memchr (pcha, 0, 1)); + + A (&snul.c == memchr (&snul.c, 0, 1)); + A (!memchr (&snul.c, 'a', 1)); + + A (&schb.c == memchr (&schb.c, 'b', 1)); + A (!memchr (&schb.c, 0, 1)); + + A (&sarr[0].c == memchr (&sarr[0].c, 0, 1)); + A (!memchr (&sarr[0].c, 'a', 1)); + + A (&sarr[1].c == memchr (&sarr[1].c, 'c', 1)); + A (!memchr (&sarr[1].c, 0, 1)); + + A (&sarr[0].c == memchr (psarr0c, 0, 1)); + A (!memchr (psarr0c, 'a', 1)); + + A (&sarr[1].c == memchr (psarr1c, 'c', 1)); + A (!memchr (psarr1c, 0, 1)); +} + +/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */