Hi,

this patch is a follow-up to my patch here:
https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01800.html

Since most calls of c_strlen and get_range_strlen expect
a string length in bytes of a zero-terminated string, there is
a need for a new parameter eltsize, which is per default 1,
but can be used in gimple-ssa-sprintf.c to specify the
expected character size.

Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?


Thanks
Bernd.
2018-08-14  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* builtins.c (c_strlen): Add new parameter eltsize.
	* builtins.h (c_strlen): Adjust prototype.
	* expr.c (string_constant): Add new parameter mem_size.
	* expr.h (string_constant): Adjust protoype.
	* gimple-fold.c (get_range_strlen): Add new parameter eltsize.
	* gimple-fold.h (get_range_strlen): Adjust prototype.
	* gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize.
	(format_string): Call get_string_length with eltsize.

2018-08-14  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* gcc.dg/strlenopt-49.c: Adjust test case.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Likewise.

diff -pur gcc-9-20180812-1/gcc/builtins.c gcc-9-20180812-2/gcc/builtins.c
--- gcc-9-20180812-1/gcc/builtins.c	2018-08-13 20:57:43.318627928 +0200
+++ gcc-9-20180812-2/gcc/builtins.c	2018-08-14 06:22:14.547504925 +0200
@@ -568,13 +568,13 @@ string_length (const void *ptr, unsigned
    accesses.  Note that this implies the result is not going to be emitted
    into the instruction stream.
 
-   The value returned is of type `ssizetype'.
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.
 
-   Unfortunately, string_constant can't access the values of const char
-   arrays with initializers, so neither can we do so here.  */
+   The value returned is of type `ssizetype'.  */
 
 tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, unsigned eltsize)
 {
   STRIP_NOPS (src);
   if (TREE_CODE (src) == COND_EXPR
@@ -582,27 +582,28 @@ c_strlen (tree src, int only_value)
     {
       tree len1, len2;
 
-      len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
-      len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+      len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
+      len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize);
       if (tree_int_cst_equal (len1, len2))
 	return len1;
     }
 
   if (TREE_CODE (src) == COMPOUND_EXPR
       && (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
-    return c_strlen (TREE_OPERAND (src, 1), only_value);
+    return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
 
   location_t loc = EXPR_LOC_OR_LOC (src, input_location);
 
   /* Offset from the beginning of the string in bytes.  */
   tree byteoff;
-  src = string_constant (src, &byteoff);
+  tree memsize;
+  src = string_constant (src, &byteoff, &memsize);
   if (src == 0)
     return NULL_TREE;
 
   /* Determine the size of the string element.  */
-  unsigned eltsize
-    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src))));
+  if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))))
+    return NULL_TREE;
 
   /* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
      length of SRC.  Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
@@ -613,14 +614,10 @@ c_strlen (tree src, int only_value)
   HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
   strelts = strelts / eltsize - 1;
 
-  HOST_WIDE_INT maxelts = strelts;
-  tree type = TREE_TYPE (src);
-  if (tree size = TYPE_SIZE_UNIT (type))
-    if (tree_fits_shwi_p (size))
-      {
-	maxelts = tree_to_shwi (size);
-	maxelts = maxelts / eltsize - 1;
-      }
+  if (!tree_fits_uhwi_p (memsize))
+    return NULL_TREE;
+
+  HOST_WIDE_INT maxelts = tree_to_uhwi (memsize) / eltsize - 1;
 
   /* PTR can point to the byte representation of any string type, including
      char* and wchar_t*.  */
@@ -628,19 +625,23 @@ c_strlen (tree src, int only_value)
 
   if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
     {
+      /* For empty strings the result should be zero.  */
+      if (maxelts == 0)
+	return ssize_int (0);
+
+      /* The code below works only for single byte character types.  */
+      if (eltsize != 1)
+	return NULL_TREE;
+
       /* If the string has an internal NUL character followed by any
 	 non-NUL characters (e.g., "foo\0bar"), we can't compute
 	 the offset to the following NUL if we don't know where to
 	 start searching for it.  */
       unsigned len = string_length (ptr, eltsize, strelts);
-      if (len < strelts)
-	{
-	  /* Return when an embedded null character is found.  */
-	  return NULL_TREE;
-	}
 
-      if (!maxelts)
-	return ssize_int (0);
+      /* Return when an embedded null character is found or none at all.  */
+      if (len < strelts || len > maxelts)
+	return NULL_TREE;
 
       /* We don't know the starting offset, but we do know that the string
 	 has no internal zero bytes.  If the offset falls within the bounds
@@ -650,8 +651,8 @@ c_strlen (tree src, int only_value)
       tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff;
       offsave = fold_convert (ssizetype, offsave);
       tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
-				      build_int_cst (ssizetype, len * eltsize));
-      tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), offsave);
+				      build_int_cst (ssizetype, len));
+      tree lenexp = size_diffop_loc (loc, ssize_int (strelts), offsave);
       return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
 			      build_zero_cst (ssizetype));
     }
@@ -684,6 +685,11 @@ c_strlen (tree src, int only_value)
       return NULL_TREE;
     }
 
+  /* If eltoff is larger than strelts but less than maxelts the
+     string length is zero, since the excess memory will be zero.  */
+  if (eltoff > strelts)
+    return ssize_int (0);
+
   /* Use strlen to search for the first zero byte.  Since any strings
      constructed with build_string will have nulls appended, we win even
      if we get handed something like (char[4])"abcd".
@@ -691,7 +697,7 @@ c_strlen (tree src, int only_value)
      Since ELTOFF is our starting index into the string, no further
      calculation is needed.  */
   unsigned len = string_length (ptr + eltoff * eltsize, eltsize,
-				maxelts - eltoff);
+				strelts - eltoff);
 
   return ssize_int (len);
 }
diff -pur gcc-9-20180812-1/gcc/builtins.h gcc-9-20180812-2/gcc/builtins.h
--- gcc-9-20180812-1/gcc/builtins.h	2018-08-10 11:43:06.000000000 +0200
+++ gcc-9-20180812-2/gcc/builtins.h	2018-08-13 22:10:20.183271630 +0200
@@ -58,7 +58,7 @@ extern bool get_pointer_alignment_1 (tre
 				     unsigned HOST_WIDE_INT *);
 extern unsigned int get_pointer_alignment (tree);
 extern unsigned string_length (const void*, unsigned, unsigned);
-extern tree c_strlen (tree, int);
+extern tree c_strlen (tree, int, unsigned = 1);
 extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
diff -pur gcc-9-20180812-1/gcc/expr.c gcc-9-20180812-2/gcc/expr.c
--- gcc-9-20180812-1/gcc/expr.c	2018-08-13 20:57:43.320627902 +0200
+++ gcc-9-20180812-2/gcc/expr.c	2018-08-13 23:10:45.636583305 +0200
@@ -11271,10 +11271,12 @@ is_aligning_offset (const_tree offset, c
 /* Return the tree node if an ARG corresponds to a string constant or zero
    if it doesn't.  If we return nonzero, set *PTR_OFFSET to the (possibly
    non-constant) offset in bytes within the string that ARG is accessing.
-   The type of the offset is sizetype.  */
+   The type of the offset is sizetype.  If MEM_SIZE is non-zero the storage
+   size of the memory is returned.  If MEM_SIZE is zero, the string is
+   only returned when it is properly zero terminated.  */
 
 tree
-string_constant (tree arg, tree *ptr_offset)
+string_constant (tree arg, tree *ptr_offset, tree *mem_size)
 {
   tree array;
   STRIP_NOPS (arg);
@@ -11328,7 +11330,7 @@ string_constant (tree arg, tree *ptr_off
 	return NULL_TREE;
 
       tree offset;
-      if (tree str = string_constant (arg0, &offset))
+      if (tree str = string_constant (arg0, &offset, mem_size))
 	{
 	  /* Avoid pointers to arrays (see bug 86622).  */
 	  if (POINTER_TYPE_P (TREE_TYPE (arg))
@@ -11368,6 +11370,8 @@ string_constant (tree arg, tree *ptr_off
   if (TREE_CODE (array) == STRING_CST)
     {
       *ptr_offset = fold_convert (sizetype, offset);
+      if (mem_size)
+	*mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
       return array;
     }
 
@@ -11416,8 +11420,10 @@ string_constant (tree arg, tree *ptr_off
      because callers expect to be able to access the string
      up to the limit imposed by TREE_STRING_LENGTH which
      always includes the terminating NUL char.  */
-  if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
-			TREE_STRING_LENGTH (init)) < 0)
+  if (mem_size)
+    *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
+  else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
+			     TREE_STRING_LENGTH (init)) < 0)
     return NULL_TREE;
 
   *ptr_offset = offset;
diff -pur gcc-9-20180812-1/gcc/expr.h gcc-9-20180812-2/gcc/expr.h
--- gcc-9-20180812-1/gcc/expr.h	2018-06-08 09:53:31.000000000 +0200
+++ gcc-9-20180812-2/gcc/expr.h	2018-08-13 22:47:27.338724367 +0200
@@ -288,7 +288,7 @@ expand_normal (tree exp)
 
 /* Return the tree node and offset if a given argument corresponds to
    a string constant.  */
-extern tree string_constant (tree, tree *);
+extern tree string_constant (tree, tree *, tree * = NULL);
 
 /* Two different ways of generating switch statements.  */
 extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability);
diff -pur gcc-9-20180812-1/gcc/gimple-fold.c gcc-9-20180812-2/gcc/gimple-fold.c
--- gcc-9-20180812-1/gcc/gimple-fold.c	2018-08-10 11:43:06.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-fold.c	2018-08-14 11:40:16.555494378 +0200
@@ -1275,11 +1275,13 @@ gimple_fold_builtin_memset (gimple_stmt_
    Set *FLEXP to true if the range of the string lengths has been
    obtained from the upper bound of an array at the end of a struct.
    Such an array may hold a string that's longer than its upper bound
-   due to it being used as a poor-man's flexible array member.  */
+   due to it being used as a poor-man's flexible array member.
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.  */
 
 static bool
 get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
-		  int fuzzy, bool *flexp)
+		  int fuzzy, bool *flexp, unsigned eltsize = 1)
 {
   tree var, val = NULL_TREE;
   gimple *def_stmt;
@@ -1300,8 +1302,8 @@ get_range_strlen (tree arg, tree length[
 	      tree aop0 = TREE_OPERAND (op, 0);
 	      if (TREE_CODE (aop0) == INDIRECT_REF
 		  && TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME)
-		return get_range_strlen (TREE_OPERAND (aop0, 0),
-					 length, visited, type, fuzzy, flexp);
+		return get_range_strlen (TREE_OPERAND (aop0, 0), length,
+					 visited, type, fuzzy, flexp, eltsize);
 	    }
 	  else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy)
 	    {
@@ -1329,13 +1331,13 @@ get_range_strlen (tree arg, tree length[
 	    return false;
 	}
       else
-	val = c_strlen (arg, 1);
+	val = c_strlen (arg, 1, eltsize);
 
       if (!val && fuzzy)
 	{
 	  if (TREE_CODE (arg) == ADDR_EXPR)
 	    return get_range_strlen (TREE_OPERAND (arg, 0), length,
-				     visited, type, fuzzy, flexp);
+				     visited, type, fuzzy, flexp, eltsize);
 
 	  if (TREE_CODE (arg) == ARRAY_REF)
 	    {
@@ -1477,7 +1479,8 @@ get_range_strlen (tree arg, tree length[
             || gimple_assign_unary_nop_p (def_stmt))
           {
             tree rhs = gimple_assign_rhs1 (def_stmt);
-	    return get_range_strlen (rhs, length, visited, type, fuzzy, flexp);
+	    return get_range_strlen (rhs, length, visited, type, fuzzy, flexp,
+				     eltsize);
           }
 	else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
 	  {
@@ -1486,7 +1489,7 @@ get_range_strlen (tree arg, tree length[
 
 	    for (unsigned int i = 0; i < 2; i++)
 	      if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
-				     flexp))
+				     flexp, eltsize))
 		{
 		  if (fuzzy == 2)
 		    *maxlen = build_all_ones_cst (size_type_node);
@@ -1513,7 +1516,8 @@ get_range_strlen (tree arg, tree length[
             if (arg == gimple_phi_result (def_stmt))
               continue;
 
-	    if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
+	    if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp,
+				   eltsize))
 	      {
 		if (fuzzy == 2)
 		  *maxlen = build_all_ones_cst (size_type_node);
@@ -1545,10 +1549,13 @@ get_range_strlen (tree arg, tree length[
    and false if PHIs and COND_EXPRs are to be handled optimistically,
    if we can determine string length minimum and maximum; it will use
    the minimum from the ones where it can be determined.
-   STRICT false should be only used for warning code.  */
+   STRICT false should be only used for warning code.
+
+   ELTSIZE is 1 for normal single byte character strings, and 2 or
+   4 for wide characer strings.  ELTSIZE is by default 1.  */
 
 bool
-get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
+get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict)
 {
   bitmap visited = NULL;
 
@@ -1557,7 +1564,7 @@ get_range_strlen (tree arg, tree minmaxl
 
   bool flexarray = false;
   if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
-			 &flexarray))
+			 &flexarray, eltsize))
     {
       minmaxlen[0] = NULL_TREE;
       minmaxlen[1] = NULL_TREE;
@@ -3500,7 +3507,7 @@ gimple_fold_builtin_strlen (gimple_stmt_
   wide_int maxlen;
 
   tree lenrange[2];
-  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true)
+  if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true)
       && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
       && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
     {
diff -pur gcc-9-20180812-1/gcc/gimple-fold.h gcc-9-20180812-2/gcc/gimple-fold.h
--- gcc-9-20180812-1/gcc/gimple-fold.h	2018-07-09 22:33:48.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-fold.h	2018-08-14 08:19:10.004030028 +0200
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.
 extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
 extern tree canonicalize_constructor_val (tree, tree);
 extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2], bool = false);
+extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false);
 extern tree get_maxval_strlen (tree, int);
 extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
 extern bool fold_stmt (gimple_stmt_iterator *);
diff -pur gcc-9-20180812-1/gcc/gimple-ssa-sprintf.c gcc-9-20180812-2/gcc/gimple-ssa-sprintf.c
--- gcc-9-20180812-1/gcc/gimple-ssa-sprintf.c	2018-08-05 00:14:41.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-ssa-sprintf.c	2018-08-14 08:20:05.155249669 +0200
@@ -2124,12 +2124,12 @@ format_floating (const directive &dir, t
    Used by the format_string function below.  */
 
 static fmtresult
-get_string_length (tree str)
+get_string_length (tree str, unsigned eltsize)
 {
   if (!str)
     return fmtresult ();
 
-  if (tree slen = c_strlen (str, 1))
+  if (tree slen = c_strlen (str, 1, eltsize))
     {
       /* Simply return the length of the string.  */
       fmtresult res (tree_to_shwi (slen));
@@ -2142,7 +2142,7 @@ get_string_length (tree str)
      aren't known to point any such arrays result in LENRANGE[1] set
      to SIZE_MAX.  */
   tree lenrange[2];
-  bool flexarray = get_range_strlen (str, lenrange);
+  bool flexarray = get_range_strlen (str, lenrange, eltsize);
 
   if (lenrange [0] || lenrange [1])
     {
@@ -2194,7 +2194,7 @@ get_string_length (tree str)
       return res;
     }
 
-  return get_string_length (NULL_TREE);
+  return fmtresult ();
 }
 
 /* Return the minimum and maximum number of characters formatted
@@ -2273,7 +2273,7 @@ format_string (const directive &dir, tre
   fmtresult res;
 
   /* Compute the range the argument's length can be in.  */
-  fmtresult slen = get_string_length (arg);
+  fmtresult slen = get_string_length (arg, dir.modifier == FMT_LEN_l ? 4 : 1);
   if (slen.range.min == slen.range.max
       && slen.range.min < HOST_WIDE_INT_MAX)
     {
diff -pur gcc-9-20180812-1/gcc/testsuite/gcc.dg/strlenopt-49.c gcc-9-20180812-2/gcc/testsuite/gcc.dg/strlenopt-49.c
--- gcc-9-20180812-1/gcc/testsuite/gcc.dg/strlenopt-49.c	2018-08-13 20:57:43.322627876 +0200
+++ gcc-9-20180812-2/gcc/testsuite/gcc.dg/strlenopt-49.c	2018-08-14 00:35:35.920448233 +0200
@@ -45,9 +45,9 @@ int cmp88 (void)
   return cmp88;
 }
 
-/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" { xfail *-*-* } } }
-   { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" { xfail *-*-* } } }
-   { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" { xfail *-*-* } } }
-   { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" { xfail *-*-* } } }
-   { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" { xfail *-*-* } } }
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+   { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" } }
+   { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" } }
+   { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" } }
+   { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" } }
    { dg-final { scan-tree-dump-times "cmp88 = 0;" 1 "gimple" { xfail *-*-* } } } */
diff -pur gcc-9-20180812-1/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c gcc-9-20180812-2/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c
--- gcc-9-20180812-1/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c	2017-05-01 20:45:46.000000000 +0200
+++ gcc-9-20180812-2/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c	2018-08-14 11:55:35.257585343 +0200
@@ -93,7 +93,9 @@ void test_characters ()
   T ("%x",    1234);  /* { dg-warning ".%x. directive writing 3 bytes" } */
   T ("%#X",   1235);  /* { dg-warning ".%#X. directive writing 5 bytes" } */
 
-  T ("%S",    L"1");  /* { dg-warning ".%S. directive writing 1 byte" } */
+  T ("%S",    L"1");  /* { dg-warning ".%S. directive writing 1 byte" "" { xfail *-*-* } } */
+  /* { dg-warning "writing a terminating nul past the end of the destination" "" { target *-*-* } .-1 } */
+
   T ("%-s",    "1");  /* { dg-warning ".%-s. directive writing 1 byte" } */
 
   /* Verify that characters in the source character set appear in

Reply via email to