While working on the -Wformat-length pass I noticed that in some
diagnostics that make use of the %qc and %qs directives GCC prints
non-printable characters raw. For example, it might print a newline,
corrupting the diagnostic stream (bug 77521).
Some other diagnostics that try to avoid this problem by using
a directive such as %x when the character is not printable might
use the sign-extended value of the character, printing a very
large hexadecimal value (bug 77520).
The attached patch changes the pretty printer to detect non-printable
characters in %qc and %qs directives (but not %c or %s) and print
those in hexadecimal (via "\\x%02x").
Martin
PS I used hexadecimal based on what c-format.c does but now that
I checked more carefully how %qE formats string literals I see it
uses octal. I think hexadecimal is preferable because it avoids
ambiguity but I'm open to changing it to octal if there's a strong
preference for it. Incidentally, %qE too suffers from bug 77520
(see below). The patch doesn't try to fix that.
$ cat z.C && gcc z.C
constexpr int i = "ABC\x7f_\x80XYZ";
z.C:1:19: error: invalid conversion from ‘const char*’ to ‘int’
[-fpermissive]
constexpr int i = "ABC\x7f_\x80XYZ";
^~~~~~~~~~~~~~~~~
z.C:1:19: error: ‘(int)((const char*)"ABC\177_\37777777600XYZ")’ is not
a constant expression
PR c/77520 - wrong value for extended ASCII characters in -Wformat message
PR c/77521 - %qc format directive should quote non-printable characters
gcc/c-family/ChangeLog:
2016-09-08 Martin Sebor <mse...@redhat.com>
PR c/77520
PR c/77521
* c-format.c (argument_parser::find_format_char_info): Use %qc
format directive unconditionally.
gcc/ChangeLog:
2016-09-08 Martin Sebor <mse...@redhat.com>
PR c/77520
PR c/77521
* pretty-print.c (pp_quoted_string): New function.
(pp_format): Call it for %c and %s directives.
gcc/testsuite/ChangeLog:
2016-09-08 Martin Sebor <mse...@redhat.com>
PR c/77520
PR c/77521
* gcc.dg/pr77520.c: New test.
* gcc.dg/pr77521.c: New test.
diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 09d514e..0c17340 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -2355,20 +2355,12 @@ argument_parser::find_format_char_info (char format_char)
++fci;
if (fci->format_chars == 0)
{
- if (ISGRAPH (format_char))
- format_warning_at_char (format_string_loc, format_string_cst,
- format_chars - orig_format_chars,
- OPT_Wformat_,
- "unknown conversion type character"
- " %qc in format",
- format_char);
- else
- format_warning_at_char (format_string_loc, format_string_cst,
- format_chars - orig_format_chars,
- OPT_Wformat_,
- "unknown conversion type character"
- " 0x%x in format",
- format_char);
+ format_warning_at_char (format_string_loc, format_string_cst,
+ format_chars - orig_format_chars,
+ OPT_Wformat_,
+ "unknown conversion type character"
+ " %qc in format",
+ format_char);
return NULL;
}
diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c
index 325263e..a39815e 100644
--- a/gcc/pretty-print.c
+++ b/gcc/pretty-print.c
@@ -30,6 +30,8 @@ along with GCC; see the file COPYING3. If not see
#include <iconv.h>
#endif
+static void pp_quoted_string (pretty_printer *, const char *, size_t = -1);
+
/* Overwrite the given location/range within this text_info's rich_location.
For use e.g. when implementing "+" in client format decoders. */
@@ -555,8 +557,20 @@ pp_format (pretty_printer *pp, text_info *text)
break;
case 'c':
- pp_character (pp, va_arg (*text->args_ptr, int));
- break;
+ {
+ /* When quoting, print alphanumeric, punctuation, and the space
+ character unchanged, and all others in hexadecimal with the
+ "\x" prefix. Otherwise print them all unchanged. */
+ int chr = va_arg (*text->args_ptr, int);
+ if (ISPRINT (chr) || !quote)
+ pp_character (pp, chr);
+ else
+ {
+ const char str [2] = { chr, '\0' };
+ pp_quoted_string (pp, str, 1);
+ }
+ break;
+ }
case 'd':
case 'i':
@@ -577,7 +591,10 @@ pp_format (pretty_printer *pp, text_info *text)
break;
case 's':
- pp_string (pp, va_arg (*text->args_ptr, const char *));
+ if (quote)
+ pp_quoted_string (pp, va_arg (*text->args_ptr, const char *));
+ else
+ pp_string (pp, va_arg (*text->args_ptr, const char *));
break;
case 'p':
@@ -939,6 +956,41 @@ pp_string (pretty_printer *pp, const char *str)
pp_maybe_wrap_text (pp, str, str + strlen (str));
}
+/* Append the leading N characters of STRING to the output area of
+ PRETTY-PRINTER, quoting in hexadecimal non-printable characters.
+ Setting N = -1 is as if N were set to strlen (STRING). The STRING
+ may be line-wrapped if in appropriate mode. */
+static void
+pp_quoted_string (pretty_printer *pp, const char *str, size_t n /* = -1 */)
+{
+ gcc_checking_assert (str);
+
+ const char *last = str;
+ const char *ps;
+
+ /* Compute the length if not specified. */
+ if (n == (size_t) -1)
+ n = strlen (str);
+
+ for (ps = str; n; ++ps, --n)
+ {
+ if (ISPRINT (*ps))
+ continue;
+
+ if (last < ps)
+ pp_maybe_wrap_text (pp, last, ps - 1);
+
+ /* Append the hexadecimal value of the character. Allocate a buffer
+ that's large enough for a 32-bit char plus the hex prefix. */
+ char buf [11];
+ int n = sprintf (buf, "\\x%02x", (unsigned char)*ps);
+ pp_maybe_wrap_text (pp, buf, buf + n);
+ last = ps + 1;
+ }
+
+ pp_maybe_wrap_text (pp, last, ps);
+}
+
/* Maybe print out a whitespace if needed. */
void
diff --git a/gcc/testsuite/gcc.dg/pr77520.c b/gcc/testsuite/gcc.dg/pr77520.c
new file mode 100644
index 0000000..b237639
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77520.c
@@ -0,0 +1,10 @@
+/* PR c/77520 - wrong value for extended ASCII characters in -Wformat message
+ Verify that characters in the extended ASCII range are quoted and not
+ allowed to be printed raw. */
+/* { dg-do compile } */
+/* { dg-options "-Wformat" } */
+
+void f (void)
+{
+ __builtin_printf ("%\x80"); /* { dg-warning "unknown conversion type character .\\\\x80. in format" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr77521.c b/gcc/testsuite/gcc.dg/pr77521.c
new file mode 100644
index 0000000..a41d3e0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr77521.c
@@ -0,0 +1,8 @@
+/* PR c/77521 - %qc format directive should quote non-printable characters.
+ Verify that non-printable characters in assembly constraints are quoted
+ and not allowed to be printed raw. */
+
+void f (int a, int b)
+{
+ asm ("combine %2, %0" : "=r" (a) : "0" (a), "\n" (b)); /* { dg-error "invalid punctuation .\\x0a. in constraint" } */
+}