Hi,
this patch is a follow-up to my patch here:
https://gcc.gnu.org/ml/gcc-patches/2018-07/msg01800.html
Since most calls of c_strlen and get_range_strlen expect
a string length in bytes of a zero-terminated string, there is
a need for a new parameter eltsize, which is per default 1,
but can be used in gimple-ssa-sprintf.c to specify the
expected character size.
Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk?
Thanks
Bernd.
2018-08-14 Bernd Edlinger <bernd.edlin...@hotmail.de>
* builtins.c (c_strlen): Add new parameter eltsize.
* builtins.h (c_strlen): Adjust prototype.
* expr.c (string_constant): Add new parameter mem_size.
* expr.h (string_constant): Adjust protoype.
* gimple-fold.c (get_range_strlen): Add new parameter eltsize.
* gimple-fold.h (get_range_strlen): Adjust prototype.
* gimple-ssa-sprintf.c (get_string_length): Add new parameter eltsize.
(format_string): Call get_string_length with eltsize.
2018-08-14 Bernd Edlinger <bernd.edlin...@hotmail.de>
* gcc.dg/strlenopt-49.c: Adjust test case.
* gcc.dg/tree-ssa/builtin-sprintf-warn-18.c: Likewise.
diff -pur gcc-9-20180812-1/gcc/builtins.c gcc-9-20180812-2/gcc/builtins.c
--- gcc-9-20180812-1/gcc/builtins.c 2018-08-13 20:57:43.318627928 +0200
+++ gcc-9-20180812-2/gcc/builtins.c 2018-08-14 06:22:14.547504925 +0200
@@ -568,13 +568,13 @@ string_length (const void *ptr, unsigned
accesses. Note that this implies the result is not going to be emitted
into the instruction stream.
- The value returned is of type `ssizetype'.
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1.
- Unfortunately, string_constant can't access the values of const char
- arrays with initializers, so neither can we do so here. */
+ The value returned is of type `ssizetype'. */
tree
-c_strlen (tree src, int only_value)
+c_strlen (tree src, int only_value, unsigned eltsize)
{
STRIP_NOPS (src);
if (TREE_CODE (src) == COND_EXPR
@@ -582,27 +582,28 @@ c_strlen (tree src, int only_value)
{
tree len1, len2;
- len1 = c_strlen (TREE_OPERAND (src, 1), only_value);
- len2 = c_strlen (TREE_OPERAND (src, 2), only_value);
+ len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
+ len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize);
if (tree_int_cst_equal (len1, len2))
return len1;
}
if (TREE_CODE (src) == COMPOUND_EXPR
&& (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
- return c_strlen (TREE_OPERAND (src, 1), only_value);
+ return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
location_t loc = EXPR_LOC_OR_LOC (src, input_location);
/* Offset from the beginning of the string in bytes. */
tree byteoff;
- src = string_constant (src, &byteoff);
+ tree memsize;
+ src = string_constant (src, &byteoff, &memsize);
if (src == 0)
return NULL_TREE;
/* Determine the size of the string element. */
- unsigned eltsize
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src))));
+ if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))))
+ return NULL_TREE;
/* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
length of SRC. Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
@@ -613,14 +614,10 @@ c_strlen (tree src, int only_value)
HOST_WIDE_INT strelts = TREE_STRING_LENGTH (src);
strelts = strelts / eltsize - 1;
- HOST_WIDE_INT maxelts = strelts;
- tree type = TREE_TYPE (src);
- if (tree size = TYPE_SIZE_UNIT (type))
- if (tree_fits_shwi_p (size))
- {
- maxelts = tree_to_shwi (size);
- maxelts = maxelts / eltsize - 1;
- }
+ if (!tree_fits_uhwi_p (memsize))
+ return NULL_TREE;
+
+ HOST_WIDE_INT maxelts = tree_to_uhwi (memsize) / eltsize - 1;
/* PTR can point to the byte representation of any string type, including
char* and wchar_t*. */
@@ -628,19 +625,23 @@ c_strlen (tree src, int only_value)
if (byteoff && TREE_CODE (byteoff) != INTEGER_CST)
{
+ /* For empty strings the result should be zero. */
+ if (maxelts == 0)
+ return ssize_int (0);
+
+ /* The code below works only for single byte character types. */
+ if (eltsize != 1)
+ return NULL_TREE;
+
/* If the string has an internal NUL character followed by any
non-NUL characters (e.g., "foo\0bar"), we can't compute
the offset to the following NUL if we don't know where to
start searching for it. */
unsigned len = string_length (ptr, eltsize, strelts);
- if (len < strelts)
- {
- /* Return when an embedded null character is found. */
- return NULL_TREE;
- }
- if (!maxelts)
- return ssize_int (0);
+ /* Return when an embedded null character is found or none at all. */
+ if (len < strelts || len > maxelts)
+ return NULL_TREE;
/* We don't know the starting offset, but we do know that the string
has no internal zero bytes. If the offset falls within the bounds
@@ -650,8 +651,8 @@ c_strlen (tree src, int only_value)
tree offsave = TREE_SIDE_EFFECTS (byteoff) ? save_expr (byteoff) : byteoff;
offsave = fold_convert (ssizetype, offsave);
tree condexp = fold_build2_loc (loc, LE_EXPR, boolean_type_node, offsave,
- build_int_cst (ssizetype, len * eltsize));
- tree lenexp = size_diffop_loc (loc, ssize_int (strelts * eltsize), offsave);
+ build_int_cst (ssizetype, len));
+ tree lenexp = size_diffop_loc (loc, ssize_int (strelts), offsave);
return fold_build3_loc (loc, COND_EXPR, ssizetype, condexp, lenexp,
build_zero_cst (ssizetype));
}
@@ -684,6 +685,11 @@ c_strlen (tree src, int only_value)
return NULL_TREE;
}
+ /* If eltoff is larger than strelts but less than maxelts the
+ string length is zero, since the excess memory will be zero. */
+ if (eltoff > strelts)
+ return ssize_int (0);
+
/* Use strlen to search for the first zero byte. Since any strings
constructed with build_string will have nulls appended, we win even
if we get handed something like (char[4])"abcd".
@@ -691,7 +697,7 @@ c_strlen (tree src, int only_value)
Since ELTOFF is our starting index into the string, no further
calculation is needed. */
unsigned len = string_length (ptr + eltoff * eltsize, eltsize,
- maxelts - eltoff);
+ strelts - eltoff);
return ssize_int (len);
}
diff -pur gcc-9-20180812-1/gcc/builtins.h gcc-9-20180812-2/gcc/builtins.h
--- gcc-9-20180812-1/gcc/builtins.h 2018-08-10 11:43:06.000000000 +0200
+++ gcc-9-20180812-2/gcc/builtins.h 2018-08-13 22:10:20.183271630 +0200
@@ -58,7 +58,7 @@ extern bool get_pointer_alignment_1 (tre
unsigned HOST_WIDE_INT *);
extern unsigned int get_pointer_alignment (tree);
extern unsigned string_length (const void*, unsigned, unsigned);
-extern tree c_strlen (tree, int);
+extern tree c_strlen (tree, int, unsigned = 1);
extern void expand_builtin_setjmp_setup (rtx, rtx);
extern void expand_builtin_setjmp_receiver (rtx);
extern void expand_builtin_update_setjmp_buf (rtx);
diff -pur gcc-9-20180812-1/gcc/expr.c gcc-9-20180812-2/gcc/expr.c
--- gcc-9-20180812-1/gcc/expr.c 2018-08-13 20:57:43.320627902 +0200
+++ gcc-9-20180812-2/gcc/expr.c 2018-08-13 23:10:45.636583305 +0200
@@ -11271,10 +11271,12 @@ is_aligning_offset (const_tree offset, c
/* Return the tree node if an ARG corresponds to a string constant or zero
if it doesn't. If we return nonzero, set *PTR_OFFSET to the (possibly
non-constant) offset in bytes within the string that ARG is accessing.
- The type of the offset is sizetype. */
+ The type of the offset is sizetype. If MEM_SIZE is non-zero the storage
+ size of the memory is returned. If MEM_SIZE is zero, the string is
+ only returned when it is properly zero terminated. */
tree
-string_constant (tree arg, tree *ptr_offset)
+string_constant (tree arg, tree *ptr_offset, tree *mem_size)
{
tree array;
STRIP_NOPS (arg);
@@ -11328,7 +11330,7 @@ string_constant (tree arg, tree *ptr_off
return NULL_TREE;
tree offset;
- if (tree str = string_constant (arg0, &offset))
+ if (tree str = string_constant (arg0, &offset, mem_size))
{
/* Avoid pointers to arrays (see bug 86622). */
if (POINTER_TYPE_P (TREE_TYPE (arg))
@@ -11368,6 +11370,8 @@ string_constant (tree arg, tree *ptr_off
if (TREE_CODE (array) == STRING_CST)
{
*ptr_offset = fold_convert (sizetype, offset);
+ if (mem_size)
+ *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (array));
return array;
}
@@ -11416,8 +11420,10 @@ string_constant (tree arg, tree *ptr_off
because callers expect to be able to access the string
up to the limit imposed by TREE_STRING_LENGTH which
always includes the terminating NUL char. */
- if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
- TREE_STRING_LENGTH (init)) < 0)
+ if (mem_size)
+ *mem_size = TYPE_SIZE_UNIT (TREE_TYPE (init));
+ else if (compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (init)),
+ TREE_STRING_LENGTH (init)) < 0)
return NULL_TREE;
*ptr_offset = offset;
diff -pur gcc-9-20180812-1/gcc/expr.h gcc-9-20180812-2/gcc/expr.h
--- gcc-9-20180812-1/gcc/expr.h 2018-06-08 09:53:31.000000000 +0200
+++ gcc-9-20180812-2/gcc/expr.h 2018-08-13 22:47:27.338724367 +0200
@@ -288,7 +288,7 @@ expand_normal (tree exp)
/* Return the tree node and offset if a given argument corresponds to
a string constant. */
-extern tree string_constant (tree, tree *);
+extern tree string_constant (tree, tree *, tree * = NULL);
/* Two different ways of generating switch statements. */
extern int try_casesi (tree, tree, tree, tree, rtx, rtx, rtx, profile_probability);
diff -pur gcc-9-20180812-1/gcc/gimple-fold.c gcc-9-20180812-2/gcc/gimple-fold.c
--- gcc-9-20180812-1/gcc/gimple-fold.c 2018-08-10 11:43:06.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-fold.c 2018-08-14 11:40:16.555494378 +0200
@@ -1275,11 +1275,13 @@ gimple_fold_builtin_memset (gimple_stmt_
Set *FLEXP to true if the range of the string lengths has been
obtained from the upper bound of an array at the end of a struct.
Such an array may hold a string that's longer than its upper bound
- due to it being used as a poor-man's flexible array member. */
+ due to it being used as a poor-man's flexible array member.
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1. */
static bool
get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
- int fuzzy, bool *flexp)
+ int fuzzy, bool *flexp, unsigned eltsize = 1)
{
tree var, val = NULL_TREE;
gimple *def_stmt;
@@ -1300,8 +1302,8 @@ get_range_strlen (tree arg, tree length[
tree aop0 = TREE_OPERAND (op, 0);
if (TREE_CODE (aop0) == INDIRECT_REF
&& TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME)
- return get_range_strlen (TREE_OPERAND (aop0, 0),
- length, visited, type, fuzzy, flexp);
+ return get_range_strlen (TREE_OPERAND (aop0, 0), length,
+ visited, type, fuzzy, flexp, eltsize);
}
else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy)
{
@@ -1329,13 +1331,13 @@ get_range_strlen (tree arg, tree length[
return false;
}
else
- val = c_strlen (arg, 1);
+ val = c_strlen (arg, 1, eltsize);
if (!val && fuzzy)
{
if (TREE_CODE (arg) == ADDR_EXPR)
return get_range_strlen (TREE_OPERAND (arg, 0), length,
- visited, type, fuzzy, flexp);
+ visited, type, fuzzy, flexp, eltsize);
if (TREE_CODE (arg) == ARRAY_REF)
{
@@ -1477,7 +1479,8 @@ get_range_strlen (tree arg, tree length[
|| gimple_assign_unary_nop_p (def_stmt))
{
tree rhs = gimple_assign_rhs1 (def_stmt);
- return get_range_strlen (rhs, length, visited, type, fuzzy, flexp);
+ return get_range_strlen (rhs, length, visited, type, fuzzy, flexp,
+ eltsize);
}
else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
{
@@ -1486,7 +1489,7 @@ get_range_strlen (tree arg, tree length[
for (unsigned int i = 0; i < 2; i++)
if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
- flexp))
+ flexp, eltsize))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
@@ -1513,7 +1516,8 @@ get_range_strlen (tree arg, tree length[
if (arg == gimple_phi_result (def_stmt))
continue;
- if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp))
+ if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp,
+ eltsize))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
@@ -1545,10 +1549,13 @@ get_range_strlen (tree arg, tree length[
and false if PHIs and COND_EXPRs are to be handled optimistically,
if we can determine string length minimum and maximum; it will use
the minimum from the ones where it can be determined.
- STRICT false should be only used for warning code. */
+ STRICT false should be only used for warning code.
+
+ ELTSIZE is 1 for normal single byte character strings, and 2 or
+ 4 for wide characer strings. ELTSIZE is by default 1. */
bool
-get_range_strlen (tree arg, tree minmaxlen[2], bool strict)
+get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict)
{
bitmap visited = NULL;
@@ -1557,7 +1564,7 @@ get_range_strlen (tree arg, tree minmaxl
bool flexarray = false;
if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
- &flexarray))
+ &flexarray, eltsize))
{
minmaxlen[0] = NULL_TREE;
minmaxlen[1] = NULL_TREE;
@@ -3500,7 +3507,7 @@ gimple_fold_builtin_strlen (gimple_stmt_
wide_int maxlen;
tree lenrange[2];
- if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, true)
+ if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true)
&& lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
&& lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
{
diff -pur gcc-9-20180812-1/gcc/gimple-fold.h gcc-9-20180812-2/gcc/gimple-fold.h
--- gcc-9-20180812-1/gcc/gimple-fold.h 2018-07-09 22:33:48.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-fold.h 2018-08-14 08:19:10.004030028 +0200
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.
extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
extern tree canonicalize_constructor_val (tree, tree);
extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2], bool = false);
+extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false);
extern tree get_maxval_strlen (tree, int);
extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
extern bool fold_stmt (gimple_stmt_iterator *);
diff -pur gcc-9-20180812-1/gcc/gimple-ssa-sprintf.c gcc-9-20180812-2/gcc/gimple-ssa-sprintf.c
--- gcc-9-20180812-1/gcc/gimple-ssa-sprintf.c 2018-08-05 00:14:41.000000000 +0200
+++ gcc-9-20180812-2/gcc/gimple-ssa-sprintf.c 2018-08-14 08:20:05.155249669 +0200
@@ -2124,12 +2124,12 @@ format_floating (const directive &dir, t
Used by the format_string function below. */
static fmtresult
-get_string_length (tree str)
+get_string_length (tree str, unsigned eltsize)
{
if (!str)
return fmtresult ();
- if (tree slen = c_strlen (str, 1))
+ if (tree slen = c_strlen (str, 1, eltsize))
{
/* Simply return the length of the string. */
fmtresult res (tree_to_shwi (slen));
@@ -2142,7 +2142,7 @@ get_string_length (tree str)
aren't known to point any such arrays result in LENRANGE[1] set
to SIZE_MAX. */
tree lenrange[2];
- bool flexarray = get_range_strlen (str, lenrange);
+ bool flexarray = get_range_strlen (str, lenrange, eltsize);
if (lenrange [0] || lenrange [1])
{
@@ -2194,7 +2194,7 @@ get_string_length (tree str)
return res;
}
- return get_string_length (NULL_TREE);
+ return fmtresult ();
}
/* Return the minimum and maximum number of characters formatted
@@ -2273,7 +2273,7 @@ format_string (const directive &dir, tre
fmtresult res;
/* Compute the range the argument's length can be in. */
- fmtresult slen = get_string_length (arg);
+ fmtresult slen = get_string_length (arg, dir.modifier == FMT_LEN_l ? 4 : 1);
if (slen.range.min == slen.range.max
&& slen.range.min < HOST_WIDE_INT_MAX)
{
diff -pur gcc-9-20180812-1/gcc/testsuite/gcc.dg/strlenopt-49.c gcc-9-20180812-2/gcc/testsuite/gcc.dg/strlenopt-49.c
--- gcc-9-20180812-1/gcc/testsuite/gcc.dg/strlenopt-49.c 2018-08-13 20:57:43.322627876 +0200
+++ gcc-9-20180812-2/gcc/testsuite/gcc.dg/strlenopt-49.c 2018-08-14 00:35:35.920448233 +0200
@@ -45,9 +45,9 @@ int cmp88 (void)
return cmp88;
}
-/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" { xfail *-*-* } } }
- { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" { xfail *-*-* } } }
- { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" { xfail *-*-* } } }
- { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" { xfail *-*-* } } }
- { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" { xfail *-*-* } } }
+/* { dg-final { scan-tree-dump-times "strlen" 0 "gimple" } }
+ { dg-final { scan-tree-dump-times "len0 = 0;" 1 "gimple" } }
+ { dg-final { scan-tree-dump-times "len = 18;" 1 "gimple" } }
+ { dg-final { scan-tree-dump-times "lenx = 8;" 1 "gimple" } }
+ { dg-final { scan-tree-dump-times "leny = 0;" 1 "gimple" } }
{ dg-final { scan-tree-dump-times "cmp88 = 0;" 1 "gimple" { xfail *-*-* } } } */
diff -pur gcc-9-20180812-1/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c gcc-9-20180812-2/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c
--- gcc-9-20180812-1/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c 2017-05-01 20:45:46.000000000 +0200
+++ gcc-9-20180812-2/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c 2018-08-14 11:55:35.257585343 +0200
@@ -93,7 +93,9 @@ void test_characters ()
T ("%x", 1234); /* { dg-warning ".%x. directive writing 3 bytes" } */
T ("%#X", 1235); /* { dg-warning ".%#X. directive writing 5 bytes" } */
- T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" } */
+ T ("%S", L"1"); /* { dg-warning ".%S. directive writing 1 byte" "" { xfail *-*-* } } */
+ /* { dg-warning "writing a terminating nul past the end of the destination" "" { target *-*-* } .-1 } */
+
T ("%-s", "1"); /* { dg-warning ".%-s. directive writing 1 byte" } */
/* Verify that characters in the source character set appear in