The missing nul detection fails when the argument of the %s or
similar sprintf directive is the address of a non-nul character
constant such as in:

  const char c = 'a';
  int f (void)
  {
    return snprintf (0, 0, "%s", &c);
  }

This is because the string_constant function only succeeds for
arguments that refer to STRRING_CSTs, not to individual characters.

For the same reason, calls to memchr() such as the one below aren't
folded into constants:

  const char d = '\0';
  void* g (void)
  {
    return memchr (&d, 0, 1);
  }

To detect and diagnose the missing nul in the first example and
to fold the second, the attached patch modifies string_constant
to return a synthesized STRING_CST object for such references
(while also indicating whether such an object is properly
nul-terminated).

Tested on x86_64-linux.

Martin
PR tree-optimization/87756 - missing unterminated argument warning using address of a constant character

gcc/ChangeLog:

	PR tree-optimization/87756
	* expr.c (string_constant): Synthesize a string literal from
	the address of a constant character.
	* tree.c (build_string_literal): Add an argument.
	* tree.h (build_string_literal): Same.

gcc/testsuite/ChangeLog:

	PR tree-optimization/87756
	* gcc.dg/builtin-memchr-2.c: New test.
	* gcc.dg/builtin-memchr-3.c: Same.
	* gcc.dg/warn-sprintf-no-nul-2.c: Same.

Index: gcc/expr.c
===================================================================
--- gcc/expr.c	(revision 265496)
+++ gcc/expr.c	(working copy)
@@ -11484,18 +11484,40 @@ string_constant (tree arg, tree *ptr_offset, tree
 	offset = off;
     }
 
-  if (!init || TREE_CODE (init) != STRING_CST)
+  if (!init)
     return NULL_TREE;
 
+  *ptr_offset = offset;
+
+  tree eltype = TREE_TYPE (init);
+  tree initsize = TYPE_SIZE_UNIT (eltype);
   if (mem_size)
-    *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
+    *mem_size = initsize;
+
   if (decl)
     *decl = array;
 
-  gcc_checking_assert (tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (init)))
-		       >= TREE_STRING_LENGTH (init));
+  if (TREE_CODE (init) == INTEGER_CST)
+    {
+      /* For a reference to (address of) a single constant character,
+	 store the native representation of the character in CHARBUF.   */
+      unsigned char charbuf[MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT];
+      int len = native_encode_expr (init, charbuf, sizeof charbuf, 0);
+      if (len > 0)
+	{
+	  /* Construct a string literal with elements of ELTYPE and
+	     the representation above.  Then strip
+	     the ADDR_EXPR (ARRAY_REF (...)) around the STRING_CST.  */
+	  init = build_string_literal (len, (char *)charbuf, eltype);
+	  init = TREE_OPERAND (TREE_OPERAND (init, 0), 0);
+	}
+    }
 
-  *ptr_offset = offset;
+  if (TREE_CODE (init) != STRING_CST)
+    return NULL_TREE;
+
+  gcc_checking_assert (tree_to_shwi (initsize) >= TREE_STRING_LENGTH (init));
+
   return init;
 }
 
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	(revision 265496)
+++ gcc/tree.c	(working copy)
@@ -11141,25 +11141,28 @@ build_alloca_call_expr (tree size, unsigned int al
     }
 }
 
-/* Create a new constant string literal and return a char* pointer to it.
-   The STRING_CST value is the LEN characters at STR.  */
+/* Create a new constant string literal consisting of elements of type
+   ELTYPE and return a tree node representing char* pointer to it as
+   an ADDR_EXPR (ARRAY_REF (ELTYPE, ...)).  The STRING_CST value is
+   the LEN bytes at STR (the representation of the string, which may
+   be wide).  */
+
 tree
-build_string_literal (int len, const char *str)
+build_string_literal (int len, const char *str,
+		      tree eltype /* = char_type_node */)
 {
-  tree t, elem, index, type;
-
-  t = build_string (len, str);
-  elem = build_type_variant (char_type_node, 1, 0);
-  index = build_index_type (size_int (len - 1));
-  type = build_array_type (elem, index);
+  tree t = build_string (len, str);
+  tree index = build_index_type (size_int (len - 1));
+  eltype = build_type_variant (eltype, 1, 0);
+  tree type = build_array_type (eltype, index);
   TREE_TYPE (t) = type;
   TREE_CONSTANT (t) = 1;
   TREE_READONLY (t) = 1;
   TREE_STATIC (t) = 1;
 
-  type = build_pointer_type (elem);
+  type = build_pointer_type (eltype);
   t = build1 (ADDR_EXPR, type,
-	      build4 (ARRAY_REF, elem,
+	      build4 (ARRAY_REF, eltype,
 		      t, integer_zero_node, NULL_TREE, NULL_TREE));
   return t;
 }
Index: gcc/tree.h
===================================================================
--- gcc/tree.h	(revision 265496)
+++ gcc/tree.h	(working copy)
@@ -4194,7 +4194,7 @@ extern tree build_call_expr_internal_loc_array (lo
 extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
 				       int, ...);
 extern tree build_alloca_call_expr (tree, unsigned int, HOST_WIDE_INT);
-extern tree build_string_literal (int, const char *);
+extern tree build_string_literal (int, const char *, tree = char_type_node);
 
 /* Construct various nodes representing data types.  */
 
Index: gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c
===================================================================
--- gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/warn-sprintf-no-nul-2.c	(working copy)
@@ -0,0 +1,131 @@
+/* PR tree-optimization/87756 - missing unterminated argument warning
+   using address of a constant character
+   { dg-do compile }
+   { dg-options "-O2 -Wall -ftrack-macro-expansion=0" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+int sprintf (char*, const char*, ...);
+
+extern char* dest (void);
+extern void sink (int, ...);
+
+#define D dest ()
+#define T(expr)   sink (0, (expr))
+
+
+const char cnul = '\0';
+const char cnonul = 'a';
+const char str3[] = "123";
+
+const struct
+{
+  char a, b, s[3];
+} s1 = { '\0', 'b', "123" },
+  s2[2] = {
+  { '\0', 'c', "12" },
+  { 'd', '\0', "123" }
+  };
+
+void test_sprintf_s (void)
+{
+  T (sprintf (D, "%s", &cnul));
+  T (sprintf (D, "%s", &cnonul));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &cnonul));
+  T (sprintf (D, "%.2s", &cnonul));     /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &s1.a));
+  T (sprintf (D, "%s", &s1.b));         /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &s1.b));
+  T (sprintf (D, "%.2s", &s1.b));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", s1.s));          /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.3s", s1.s));
+  T (sprintf (D, "%.4s", s1.s));        /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%.2s", s1.s + 1));
+  T (sprintf (D, "%.3s", s1.s + 1));    /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &s2[0].a));
+  T (sprintf (D, "%s", &s2[0].b));      /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &s2[0].b));
+  T (sprintf (D, "%.2s", &s2[0].b));    /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", s2[0].s));
+  T (sprintf (D, "%.3s", s2[0].s));
+  T (sprintf (D, "%.4s", s2[0].s));
+
+  T (sprintf (D, "%.2s", s2[0].s + 1));
+  T (sprintf (D, "%.3s", s2[0].s + 1));
+
+  T (sprintf (D, "%s", &s2[1].a));      /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &s2[1].a));
+  T (sprintf (D, "%.2s", &s2[1].a));    /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", &s2[1].b));
+  T (sprintf (D, "%s", s2[1].s));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.3s", s2[1].s));
+  T (sprintf (D, "%.4s", s2[1].s));     /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%.2s", s2[1].s + 1));
+  T (sprintf (D, "%.3s", s2[1].s + 1)); /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &str3[3]));
+  T (sprintf (D, "%s", &str3[4]));      /* { dg-warning "\\\[-Warray-bounds" } */
+}
+
+
+const char wnul = '\0';
+const char wnonul = 'a';
+const char wcs3[] = "123";
+
+const struct
+{
+  char a, b, s[3];
+} w1 = { '\0', 'b', "123" },
+  w2[2] = {
+  { '\0', 'c', "12" },
+  { 'd', '\0', "123" }
+  };
+
+void test_sprintf_ls (void)
+{
+  T (sprintf (D, "%s", &wnul));
+  T (sprintf (D, "%s", &wnonul));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &wnonul));
+  T (sprintf (D, "%.2s", &wnonul));     /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &w1.a));
+  T (sprintf (D, "%s", &w1.b));         /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &w1.b));
+  T (sprintf (D, "%.2s", &w1.b));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", w1.s));          /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.3s", w1.s));
+  T (sprintf (D, "%.4s", w1.s));        /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%.2s", w1.s + 1));
+  T (sprintf (D, "%.3s", w1.s + 1));    /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &w2[0].a));
+  T (sprintf (D, "%s", &w2[0].b));      /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &w2[0].b));
+  T (sprintf (D, "%.2s", &w2[0].b));    /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", w2[0].s));
+  T (sprintf (D, "%.3s", w2[0].s));
+  T (sprintf (D, "%.4s", w2[0].s));
+
+  T (sprintf (D, "%.2s", w2[0].s + 1));
+  T (sprintf (D, "%.3s", w2[0].s + 1));
+
+  T (sprintf (D, "%s", &w2[1].a));      /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.1s", &w2[1].a));
+  T (sprintf (D, "%.2s", &w2[1].a));    /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%s", &w2[1].b));
+  T (sprintf (D, "%s", w2[1].s));       /* { dg-warning "nul-terminated" } */
+  T (sprintf (D, "%.3s", w2[1].s));
+  T (sprintf (D, "%.4s", w2[1].s));     /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%.2s", w2[1].s + 1));
+  T (sprintf (D, "%.3s", w2[1].s + 1)); /* { dg-warning "nul-terminated" } */
+
+  T (sprintf (D, "%s", &wcs3[3]));
+  T (sprintf (D, "%s", &wcs3[4]));      /* { dg-warning "\\\[-Warray-bounds" } */
+}
Index: gcc/testsuite/gcc.dg/builtin-memchr-2.c
===================================================================
--- gcc/testsuite/gcc.dg/builtin-memchr-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/builtin-memchr-2.c	(working copy)
@@ -0,0 +1,55 @@
+/* Verify that memchr calls with the address of a constant character
+   are folded as expected even at -O0.
+  { dg-do compile }
+  { dg-options "-O0 -Wall -fdump-tree-gimple" } */
+
+typedef __SIZE_TYPE__  size_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+extern void* memchr (const void*, int, size_t);
+extern int printf (const char*, ...);
+extern void abort (void);
+
+#define A(expr)							\
+  ((expr)							\
+   ? (void)0							\
+   : (printf ("assertion failed on line %i: %s\n",		\
+			__LINE__, #expr),			\
+      abort ()))
+
+const char nul = 0;
+const char cha = 'a';
+
+const struct
+{
+  char c;
+} snul = { 0 },
+  schb = { 'b' },
+  sarr[] = {
+  { 0 },
+  { 'c' }
+  };
+
+
+void test_memchr_cst_char (void)
+{
+  A (&nul == memchr (&nul, 0, 1));
+  A (!memchr (&nul, 'a', 1));
+
+  A (&cha == memchr (&cha, 'a', 1));
+  A (!memchr (&cha, 0, 1));
+
+  A (&snul.c == memchr (&snul.c, 0, 1));
+  A (!memchr (&snul.c, 'a', 1));
+
+  A (&schb.c == memchr (&schb.c, 'b', 1));
+  A (!memchr (&schb.c, 0, 1));
+
+  A (&sarr[0].c == memchr (&sarr[0].c, 0, 1));
+  A (!memchr (&sarr[0].c, 'a', 1));
+
+  A (&sarr[1].c == memchr (&sarr[1].c, 'c', 1));
+  A (!memchr (&sarr[1].c, 0, 1));
+}
+
+/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */
Index: gcc/testsuite/gcc.dg/builtin-memchr-3.c
===================================================================
--- gcc/testsuite/gcc.dg/builtin-memchr-3.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/builtin-memchr-3.c	(working copy)
@@ -0,0 +1,72 @@
+/* Verify that memchr calls with a pointer to a constant character
+   are folded as expected.
+   { dg-do compile }
+   { dg-options "-O1 -Wall -fdump-tree-gimple" } */
+
+typedef __SIZE_TYPE__  size_t;
+typedef __WCHAR_TYPE__ wchar_t;
+
+extern void* memchr (const void*, int, size_t);
+extern int printf (const char*, ...);
+extern void abort (void);
+
+#define A(expr)						\
+  ((expr)						\
+   ? (void)0						\
+   : (printf ("assertion failed on line %i: %s\n",	\
+	      __LINE__, #expr),				\
+      abort ()))
+
+const char nul = 0;
+const char cha = 'a';
+
+const char* const pnul = &nul;
+const char* const pcha = &cha;
+
+const struct
+{
+  char c;
+} snul = { 0 },
+  schb = { 'b' },
+  sarr[] = {
+  { 0 },
+  { 'c' }
+  };
+
+const char* const psarr0c = &sarr[0].c;
+const char* const psarr1c = &sarr[1].c;
+
+void test_memchr_cst_char (void)
+{
+  A (&nul == memchr (&nul, 0, 1));
+  A (!memchr (&nul, 'a', 1));
+
+  A (&cha == memchr (&cha, 'a', 1));
+  A (!memchr (&cha, 0, 1));
+
+  A (&nul == memchr (pnul, 0, 1));
+  A (!memchr (pnul, 'a', 1));
+
+  A (&cha == memchr (pcha, 'a', 1));
+  A (!memchr (pcha, 0, 1));
+
+  A (&snul.c == memchr (&snul.c, 0, 1));
+  A (!memchr (&snul.c, 'a', 1));
+
+  A (&schb.c == memchr (&schb.c, 'b', 1));
+  A (!memchr (&schb.c, 0, 1));
+
+  A (&sarr[0].c == memchr (&sarr[0].c, 0, 1));
+  A (!memchr (&sarr[0].c, 'a', 1));
+
+  A (&sarr[1].c == memchr (&sarr[1].c, 'c', 1));
+  A (!memchr (&sarr[1].c, 0, 1));
+
+  A (&sarr[0].c == memchr (psarr0c, 0, 1));
+  A (!memchr (psarr0c, 'a', 1));
+
+  A (&sarr[1].c == memchr (psarr1c, 'c', 1));
+  A (!memchr (psarr1c, 0, 1));
+}
+
+/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */

Reply via email to