It didn't seem like we were making progress in the debate about
warning for sprintf argument mismatches earlier today so I took
a couple of hours this afternoon to prototype one of the solutions
I was trying to describe. It mostly keeps the existing interface
and just extends c_strlen() and the other functions to pass in
an in-out argument describing the requested element size on input
and the constant string on output. The caller is responsible for
validating the string to make sure its type matches the expected
type.
String functions like strcpy interested in the size of their
argument in bytes succeed even for a wide string argument and
are candidates for folding (this matches the original behavior).
The patch doesn't add any warning for mismatched calls to those
(such as strcpy(d, (char*)L"123");) but enhancing it to do that
would be just as "simple" as adding the missing nul detection.
Calls to sprintf "%s" and "%ls" also succeed with mismatched
arguments but get a warning:
warning: ‘%s’ invalid directive argument type ‘int[4]’ [-Wformat-overflow=]
3 | __builtin_sprintf (d, "%s", (char*)L"123");
| ^~ ~~~~~~
warning: ‘%ls’ invalid directive argument type ‘char[4]’
[-Wformat-overflow=]
4 | __builtin_sprintf (d, "%ls", (__WCHAR_TYPE__*)"123");
| ^~~ ~~~~~
FWIW, I chose the approach of adding a c_strlen_data structure
over adding yet another argument to c_strlen() and friends to
keep the argument list from getting too long and confusing
(like get_range_strlen). This should also make it easy to
retrofit the missig nul detection patch on top of it.
I didn't take any time to add tests for the restored strcpy
(et al.) folding.
If this looks like the general direction we can agree on (perhaps
with some tweaks, including not folding etc.) I will add those
tests, plus more for the various argument mismatches.
Tested on x86_64-linux.
Martin
PR tree-optimization/87034 - missing -Wformat-overflow on a sprintf %s with a wide string
gcc/ChangeLog:
PR tree-optimization/87034
* builtins.c (c_strlen): Add an argument. Optionally return string
to caller.
* builtins.h (c_strlen): Add an argument.
* gimple-fold.c (get_range_strlen): Replace argument with
c_strlen_data *.
(get_maxval_strlen): Adjust call to get_range_strlen.
(gimple_fold_builtin_strlen): Same.
* gimple-fold.h (c_strlen_data): New struct.
(get_range_strlen): Add optional argument.
* gimple-ssa-sprintf.c (get_string_length): Change argument type.
(format_string): Same. Adjust.
(format_directive): Diagnose incompatible arguments.
gcc/testsuite/ChangeLog:
PR tree-optimization/87034
* gcc.dg/tree-ssa/builtin-sprintf-warn-20.c: Remove xfail.
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c (revision 263754)
+++ gcc/builtins.c (working copy)
@@ -568,15 +568,12 @@ string_length (const void *ptr, unsigned eltsize,
accesses. Note that this implies the result is not going to be emitted
into the instruction stream.
- ELTSIZE is 1 for normal single byte character strings, and 2 or
- 4 for wide characer strings. ELTSIZE is by default 1.
The value returned is of type `ssizetype'. */
tree
-c_strlen (tree src, int only_value, unsigned eltsize)
+c_strlen (tree src, int only_value, c_strlen_data *pdata /* = NULL */)
{
- gcc_assert (eltsize == 1 || eltsize == 2 || eltsize == 4);
STRIP_NOPS (src);
if (TREE_CODE (src) == COND_EXPR
&& (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
@@ -583,8 +580,8 @@ tree
{
tree len1, len2;
- len1 = c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
- len2 = c_strlen (TREE_OPERAND (src, 2), only_value, eltsize);
+ len1 = c_strlen (TREE_OPERAND (src, 1), only_value, pdata);
+ len2 = c_strlen (TREE_OPERAND (src, 2), only_value, pdata);
if (tree_int_cst_equal (len1, len2))
return len1;
}
@@ -591,7 +588,7 @@ tree
if (TREE_CODE (src) == COMPOUND_EXPR
&& (only_value || !TREE_SIDE_EFFECTS (TREE_OPERAND (src, 0))))
- return c_strlen (TREE_OPERAND (src, 1), only_value, eltsize);
+ return c_strlen (TREE_OPERAND (src, 1), only_value, pdata);
location_t loc = EXPR_LOC_OR_LOC (src, input_location);
@@ -602,10 +599,16 @@ tree
if (src == 0)
return NULL_TREE;
- /* Determine the size of the string element. */
- if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (src)))))
- return NULL_TREE;
+ unsigned eltsize = 1;
+ if (pdata)
+ {
+ eltsize = pdata->eltsize;
+ tree srctype = TREE_TYPE (TREE_TYPE (src));
+ if (eltsize != tree_to_uhwi (TYPE_SIZE_UNIT (srctype)))
+ pdata->string = src;
+ }
+
/* Set MAXELTS to sizeof (SRC) / sizeof (*SRC) - 1, the maximum possible
length of SRC. Prefer TYPE_SIZE() to TREE_STRING_LENGTH() if possible
in case the latter is less than the size of the array, such as when
Index: gcc/builtins.h
===================================================================
--- gcc/builtins.h (revision 263754)
+++ gcc/builtins.h (working copy)
@@ -58,7 +58,14 @@ extern bool get_pointer_alignment_1 (tree, unsigne
unsigned HOST_WIDE_INT *);
extern unsigned int get_pointer_alignment (tree);
extern unsigned string_length (const void*, unsigned, unsigned);
-extern tree c_strlen (tree, int, unsigned = 1);
+
+struct c_strlen_data
+{
+ unsigned eltsize;
+ tree string;
+};
+
+extern tree c_strlen (tree, int, c_strlen_data * = NULL);
extern void expand_builtin_setjmp_setup (rtx, rtx);
extern void expand_builtin_setjmp_receiver (rtx);
extern void expand_builtin_update_setjmp_buf (rtx);
Index: gcc/gimple-fold.c
===================================================================
--- gcc/gimple-fold.c (revision 263754)
+++ gcc/gimple-fold.c (working copy)
@@ -1281,7 +1281,7 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *
static bool
get_range_strlen (tree arg, tree length[2], bitmap *visited, int type,
- int fuzzy, bool *flexp, unsigned eltsize = 1)
+ int fuzzy, bool *flexp, c_strlen_data *pdata)
{
tree var, val = NULL_TREE;
gimple *def_stmt;
@@ -1303,7 +1303,7 @@ get_range_strlen (tree arg, tree length[2], bitmap
if (TREE_CODE (aop0) == INDIRECT_REF
&& TREE_CODE (TREE_OPERAND (aop0, 0)) == SSA_NAME)
return get_range_strlen (TREE_OPERAND (aop0, 0), length,
- visited, type, fuzzy, flexp, eltsize);
+ visited, type, fuzzy, flexp, pdata);
}
else if (TREE_CODE (TREE_OPERAND (op, 0)) == COMPONENT_REF && fuzzy)
{
@@ -1331,13 +1331,13 @@ get_range_strlen (tree arg, tree length[2], bitmap
return false;
}
else
- val = c_strlen (arg, 1, eltsize);
+ val = c_strlen (arg, 1, pdata);
if (!val && fuzzy)
{
if (TREE_CODE (arg) == ADDR_EXPR)
return get_range_strlen (TREE_OPERAND (arg, 0), length,
- visited, type, fuzzy, flexp, eltsize);
+ visited, type, fuzzy, flexp, pdata);
if (TREE_CODE (arg) == ARRAY_REF)
{
@@ -1480,7 +1480,7 @@ get_range_strlen (tree arg, tree length[2], bitmap
{
tree rhs = gimple_assign_rhs1 (def_stmt);
return get_range_strlen (rhs, length, visited, type, fuzzy, flexp,
- eltsize);
+ pdata);
}
else if (gimple_assign_rhs_code (def_stmt) == COND_EXPR)
{
@@ -1489,7 +1489,7 @@ get_range_strlen (tree arg, tree length[2], bitmap
for (unsigned int i = 0; i < 2; i++)
if (!get_range_strlen (ops[i], length, visited, type, fuzzy,
- flexp, eltsize))
+ flexp, pdata))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
@@ -1517,7 +1517,7 @@ get_range_strlen (tree arg, tree length[2], bitmap
continue;
if (!get_range_strlen (arg, length, visited, type, fuzzy, flexp,
- eltsize))
+ pdata))
{
if (fuzzy == 2)
*maxlen = build_all_ones_cst (size_type_node);
@@ -1555,7 +1555,8 @@ get_range_strlen (tree arg, tree length[2], bitmap
4 for wide characer strings. ELTSIZE is by default 1. */
bool
-get_range_strlen (tree arg, tree minmaxlen[2], unsigned eltsize, bool strict)
+get_range_strlen (tree arg, tree minmaxlen[2],
+ c_strlen_data *pdata /* = NULL */, bool strict /* = true */)
{
bitmap visited = NULL;
@@ -1564,7 +1565,7 @@ bool
bool flexarray = false;
if (!get_range_strlen (arg, minmaxlen, &visited, 1, strict ? 1 : 2,
- &flexarray, eltsize))
+ &flexarray, pdata))
{
minmaxlen[0] = NULL_TREE;
minmaxlen[1] = NULL_TREE;
@@ -1583,7 +1584,7 @@ get_maxval_strlen (tree arg, int type)
tree len[2] = { NULL_TREE, NULL_TREE };
bool dummy;
- if (!get_range_strlen (arg, len, &visited, type, 0, &dummy))
+ if (!get_range_strlen (arg, len, &visited, type, 0, &dummy, NULL))
len[1] = NULL_TREE;
if (visited)
BITMAP_FREE (visited);
@@ -3507,7 +3508,7 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *
wide_int maxlen;
tree lenrange[2];
- if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, 1, true)
+ if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange, NULL, true)
&& lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST
&& lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST)
{
Index: gcc/gimple-fold.h
===================================================================
--- gcc/gimple-fold.h (revision 263754)
+++ gcc/gimple-fold.h (working copy)
@@ -25,7 +25,10 @@ along with GCC; see the file COPYING3. If not see
extern tree create_tmp_reg_or_ssa_name (tree, gimple *stmt = NULL);
extern tree canonicalize_constructor_val (tree, tree);
extern tree get_symbol_constant_value (tree);
-extern bool get_range_strlen (tree, tree[2], unsigned = 1, bool = false);
+
+struct c_strlen_data;
+extern bool get_range_strlen (tree, tree[2], c_strlen_data * = NULL,
+ bool = false);
extern tree get_maxval_strlen (tree, int);
extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree);
extern bool fold_stmt (gimple_stmt_iterator *);
Index: gcc/gimple-ssa-sprintf.c
===================================================================
--- gcc/gimple-ssa-sprintf.c (revision 263754)
+++ gcc/gimple-ssa-sprintf.c (working copy)
@@ -514,7 +514,7 @@ struct fmtresult
/* Construct a FMTRESULT object with all counters initialized
to MIN. KNOWNRANGE is set when MIN is valid. */
fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
- : argmin (), argmax (),
+ : argmin (), argmax (), badarg (),
knownrange (min < HOST_WIDE_INT_MAX),
mayfail (), nullp ()
{
@@ -528,7 +528,7 @@ struct fmtresult
KNOWNRANGE is set when both MIN and MAX are valid. */
fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
- : argmin (), argmax (),
+ : argmin (), argmax (), badarg (),
knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
mayfail (), nullp ()
{
@@ -551,6 +551,10 @@ struct fmtresult
/* The range a directive's argument is in. */
tree argmin, argmax;
+ /* For a directive with a mismatched argument such as %s with
+ a wchar_t, the argument. */
+ tree badarg;
+
/* The minimum and maximum number of bytes that a directive
results in on output for an argument in the range above. */
result_range range;
@@ -1994,12 +1998,12 @@ format_floating (const directive &dir, tree arg, v
Used by the format_string function below. */
static fmtresult
-get_string_length (tree str, unsigned eltsize)
+get_string_length (tree str, c_strlen_data *pdata)
{
if (!str)
return fmtresult ();
- if (tree slen = c_strlen (str, 1, eltsize))
+ if (tree slen = c_strlen (str, 1, pdata))
{
/* Simply return the length of the string. */
fmtresult res (tree_to_shwi (slen));
@@ -2012,7 +2016,7 @@ static fmtresult
aren't known to point any such arrays result in LENRANGE[1] set
to SIZE_MAX. */
tree lenrange[2];
- bool flexarray = get_range_strlen (str, lenrange, eltsize);
+ bool flexarray = get_range_strlen (str, lenrange, pdata);
if (lenrange [0] || lenrange [1])
{
@@ -2153,9 +2157,21 @@ format_string (const directive &dir, tree arg, vr_
{
fmtresult res;
- /* Compute the range the argument's length can be in. */
- int count_by = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1;
- fmtresult slen = get_string_length (arg, count_by);
+ /* Compute the range the argument's length can be in. On input
+ to get_string_length set ELT to the size of the expected argument
+ type; on output, if successful, expect the function to set ELT to
+ the type of the actual argument. */
+ c_strlen_data data;
+ data.eltsize = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1;
+ data.string = NULL_TREE;
+ fmtresult slen = get_string_length (arg, &data);
+ if (data.string)
+ {
+ tree strtype = TREE_TYPE (data.string);
+ if (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (strtype))) != data.eltsize)
+ res.badarg = data.string;
+ }
+
if (slen.range.min == slen.range.max
&& slen.range.min < HOST_WIDE_INT_MAX)
{
@@ -2769,6 +2785,16 @@ format_directive (const sprintf_dom_walker::call_i
return false;
}
+ if (fmtres.badarg)
+ {
+ fmtwarn (dirloc, argloc, NULL, info.warnopt (),
+ "%<%.*s%> invalid directive argument type %qT",
+ dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg),
+ TREE_TYPE (fmtres.badarg));
+
+ res->warned = true;
+ }
+
/* Compute the number of available bytes in the destination. There
must always be at least one byte of space for the terminating
NUL that's appended after the format string has been processed. */
Index: gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-20.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-20.c (revision 263754)
+++ gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-20.c (working copy)
@@ -18,5 +18,5 @@ void test (struct S *p)
const char *q = sizeof (wchar_t) == 2
? (char*)L"\x4142\x4344" : (char*)L"\x41424344\x45464748";
- sprintf (p->a, "%s", q); /* { dg-warning "\\\[-Wformat-overflow" "pr87034" { xfail *-*-*} } */
+ sprintf (p->a, "%s", q); /* { dg-warning "\\\[-Wformat-overflow" } */
}