PR 78512 - r242674 miscompiles Linux kernel observes that the Linux
kernel fails to boot as a result of enabling the -fprintf-return-value
optimization in GCC.  This is likely because the kernel has its own
sprintf with a large set of extensions to the %p directive that
conflict with the optimization.  Ordinarily, programs that define
their own versions of C library functions that differ from what C
specifies are expected to disable GCC's built-ins (e.g., by
-fno-builtin, or for freestanding environments like the Linux kernel,
by -ffreestanding).  But the Linux kernel doesn't do that and hence
the conflict.

After discussing a few possible options (handling the kernel extensions
in GCC, providing a new GCC option to disable the %p handling, and
disabling both the optimization and the warning for calls involving
the %p directive, the last was viewed as the best alternative).  The
attached patch removes the %p handling from GCC.

Martin

The kernel extensions are documented below:
  https://www.kernel.org/doc/Documentation/printk-formats.txt

and an implementation is here:
  https://github.com/torvalds/linux/blob/master/lib/vsprintf.c
PR tree-optimization/78512 - [7 Regression] r242674 miscompiles Linux kernel

gcc/ChangeLog:

	PR tree-optimization/78512
	* config/linux.h (TARGET_PRINTF_POINTER_FORMAT): Remove.
	* config/rs6000/linux.h: Same.
	* config/rs6000/linux64.h: Same.
	* config/sol2.h: Same.
	* config/sol2.c (solaris_printf_pointer_format): Remove.
	* doc/tm.texi.in (TARGET_PRINTF_POINTER_FORMAT): Remove.
	* doc/tm.texi: Regenerate.
	* gimple-ssa-sprintf.c (format_pointer): Rempove.
	(pass_sprintf_length::compute_format_length): Return bool.
	(pass_sprintf_length::handle_gimple_call): Adjust.
	* target.def (printf_pointer_format): Remove.
	* targhooks.c (default_printf_pointer_format): Remove.
	(linux_printf_pointer_format): Same.
	* targhooks.h (default_printf_pointer_format): Remove.
	(linux_printf_pointer_format, solaris_printf_pointer_format): Same.

gcc/testsuite/ChangeLog:

	PR tree-optimization/78512
	* gcc.dg/tree-ssa/builtin-sprintf-6.c: Add test cases.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Remove test cases.

diff --git a/gcc/config/linux.h b/gcc/config/linux.h
index 7211da2..9aeeb94 100644
--- a/gcc/config/linux.h
+++ b/gcc/config/linux.h
@@ -208,8 +208,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 # define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
 
 #endif
-
-/* The format string to which "%p" corresponds (same in Glibc and
-   uClibc.  */
-#undef TARGET_PRINTF_POINTER_FORMAT
-#define TARGET_PRINTF_POINTER_FORMAT linux_printf_pointer_format
diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h
index a28e17f..ac9296d 100644
--- a/gcc/config/rs6000/linux.h
+++ b/gcc/config/rs6000/linux.h
@@ -138,7 +138,3 @@
   || (TARGET_GLIBC_MAJOR == 2 && TARGET_GLIBC_MINOR >= 19)
 #define RS6000_GLIBC_ATOMIC_FENV 1
 #endif
-
-/* The format string to which "%p" corresponds.  */
-#undef TARGET_PRINTF_POINTER_FORMAT
-#define TARGET_PRINTF_POINTER_FORMAT linux_printf_pointer_format
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 7de51ea..0101ec0 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -640,7 +640,3 @@ extern int dot_symbols;
    enabling the __float128 keyword.  */
 #undef	TARGET_FLOAT128_ENABLE_TYPE
 #define TARGET_FLOAT128_ENABLE_TYPE 1
-
-/* The format string to which "%p" corresponds.  */
-#undef TARGET_PRINTF_POINTER_FORMAT
-#define TARGET_PRINTF_POINTER_FORMAT linux_printf_pointer_format
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
index fcab9de..97f92e6 100644
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -31,9 +31,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "output.h"
 
-#undef TARGET_PRINTF_POINTER_FORMAT
-#define TARGET_PRINTF_POINTER_FORMAT solaris_printf_pointer_format
-
 tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
 
 /* Attach any pending attributes for DECL to the list in *ATTRIBUTES.
@@ -301,14 +298,3 @@ solaris_override_options (void)
   if (!HAVE_LD_EH_FRAME_CIEV3 && !global_options_set.x_dwarf_version)
     dwarf_version = 2;
 }
-
-/* Solaris libc formats pointers as if by "%zx" with the pound ('#')
-   format flag having the same meaning as in the integer directive.  */
-
-const char*
-solaris_printf_pointer_format (tree, const char **flags)
-{
-  *flags = "#";
-
-  return "%zx";
-}
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index 6f02708..50f2b38 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -440,10 +440,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION default_libc_has_function
 
-/* The format string to which "%p" corresponds.  */
-#undef TARGET_LIBC_PRINTF_POINTER_FORMAT
-#define TARGET_LIBC_PRINTF_POINTER_FORMAT solaris_libc_printf_pointer_format
-
 extern GTY(()) tree solaris_pending_aligns;
 extern GTY(()) tree solaris_pending_inits;
 extern GTY(()) tree solaris_pending_finis;
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index ebcadac..931ee9a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5418,10 +5418,6 @@ In either case, it remains possible to select code-generation for the alternate
 scheme, by means of compiler command line switches.
 @end defmac
 
-@deftypefn {Target Hook} {const char*} TARGET_PRINTF_POINTER_FORMAT (tree, const char **@var{flags})
-Determine the target @code{printf} implementation format string that the most closely corresponds to the @code{%p} format directive.  The object pointed to by the @var{flags} is set to a string consisting of recognized format flags such as the @code{'#'} character.
-@end deftypefn
-
 @node Addressing Modes
 @section Addressing Modes
 @cindex addressing modes
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 95f5fd9..ce03b90 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4104,8 +4104,6 @@ In either case, it remains possible to select code-generation for the alternate
 scheme, by means of compiler command line switches.
 @end defmac
 
-@hook TARGET_PRINTF_POINTER_FORMAT
-
 @node Addressing Modes
 @section Addressing Modes
 @cindex addressing modes
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index 43bc560..709d836 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -72,7 +72,6 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "realmpfr.h"
 #include "target.h"
-#include "targhooks.h"
 
 #include "cpplib.h"
 #include "input.h"
@@ -126,7 +125,7 @@ public:
   void handle_gimple_call (gimple_stmt_iterator*);
 
   struct call_info;
-  void compute_format_length (const call_info &, format_result *);
+  bool compute_format_length (const call_info &, format_result *);
 };
 
 bool
@@ -759,83 +758,12 @@ build_intmax_type_nodes (tree *pintmax, tree *puintmax)
     }
 }
 
-static fmtresult
-format_integer (const conversion_spec &, tree);
-
-/* Return a range representing the minimum and maximum number of bytes
-   that the conversion specification SPEC will write on output for the
-   pointer argument ARG when non-null.  ARG may be null (for vararg
-   functions).  */
-
-static fmtresult
-format_pointer (const conversion_spec &spec, tree arg)
-{
-  fmtresult res;
-
-  /* Determine the target's integer format corresponding to "%p".  */
-  const char *flags;
-  const char *pfmt = targetm.printf_pointer_format (arg, &flags);
-  if (!pfmt)
-    {
-      /* The format couldn't be determined.  */
-      res.range.min = res.range.max = HOST_WIDE_INT_M1U;
-      return res;
-    }
-
-  if (pfmt [0] == '%')
-    {
-      /* Format the pointer using the integer format string.  */
-      conversion_spec pspec = spec;
-
-      /* Clear flags that are not listed as recognized.  */
-      for (const char *pf = "+ #0"; *pf; ++pf)
-	{
-	  if (!strchr (flags, *pf))
-	    pspec.clear_flag (*pf);
-	}
-
-      /* Set flags that are specified in the format string.  */
-      bool flag_p = true;
-      do
-	{
-	  switch (*++pfmt)
-	    {
-	    case '+': case ' ': case '#': case '0':
-	      pspec.set_flag (*pfmt);
-	      break;
-	    default:
-	      flag_p = false;
-	    }
-	}
-      while (flag_p);
-
-      /* Set the appropriate length modifier taking care to clear
-       the one that may be set (Glibc's %p accepts but ignores all
-       the integer length modifiers).  */
-      switch (*pfmt)
-	{
-	case 'l': pspec.modifier = FMT_LEN_l; ++pfmt; break;
-	case 't': pspec.modifier = FMT_LEN_t; ++pfmt; break;
-	case 'z': pspec.modifier = FMT_LEN_z; ++pfmt; break;
-	default: pspec.modifier = FMT_LEN_none;
-	}
-
-      pspec.force_flags = 1;
-      pspec.specifier = *pfmt++;
-      gcc_assert (*pfmt == '\0');
-      return format_integer (pspec, arg);
-    }
-
-  /* The format is a plain string such as Glibc's "(nil)".  */
-  res.range.min = res.range.max = strlen (pfmt);
-  return res;
-}
-
 /* Set *PWIDTH and *PPREC according to the width and precision specified
    in SPEC.  Each is set to HOST_WIDE_INT_MIN when the corresponding
    field is specified but unknown, to zero for width and -1 for precision,
    respectively when it's not specified, or to a non-negative value
    corresponding to the known value.  */
+
 static void
 get_width_and_precision (const conversion_spec &spec,
 			 HOST_WIDE_INT *pwidth, HOST_WIDE_INT *pprec)
@@ -867,7 +795,6 @@ get_width_and_precision (const conversion_spec &spec,
   *pprec = prec;
 }
 
-
 /* Return a range representing the minimum and maximum number of bytes
    that the conversion specification SPEC will write on output for the
    integer argument ARG when non-null.  ARG may be null (for vararg
@@ -2257,9 +2184,12 @@ add_bytes (const pass_sprintf_length::call_info &info,
 
 /* Compute the length of the output resulting from the call to a formatted
    output function described by INFO and store the result of the call in
-   *RES.  Issue warnings for detected past the end writes.  */
+   *RES.  Issue warnings for detected past the end writes.  Return true
+   if the complete format string has been processed and *RES can be relied
+   on, false otherwise (e.g., when a unknown or unhandled directive was seen
+   that caused the processing to be terminated early). */
 
-void
+bool
 pass_sprintf_length::compute_format_length (const call_info &info,
 					    format_result *res)
 {
@@ -2299,7 +2229,7 @@ pass_sprintf_length::compute_format_length (const call_info &info,
       if (0 && *pf == 0)
 	{
 	  /* Incomplete directive.  */
-	  return;
+	  return false;
 	}
 
       conversion_spec spec = conversion_spec ();
@@ -2322,10 +2252,10 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 	{
 	  /* Similarly to the block above, this could be either a POSIX
 	     positional argument or a width, depending on what follows.  */
-	  if (argno < gimple_call_num_args (info.callstmt))
-	    spec.star_width = gimple_call_arg (info.callstmt, argno++);
-	  else
-	    return;
+	  if (gimple_call_num_args (info.callstmt) <= argno)
+	    return false;
+
+	  spec.star_width = gimple_call_arg (info.callstmt, argno++);
 	  ++pf;
 	}
 
@@ -2344,7 +2274,7 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 	  if (dollar == 0
 	      || dollar == info.argidx
 	      || dollar > gimple_call_num_args (info.callstmt))
-	    return;
+	    return false;
 
 	  --dollar;
 
@@ -2411,7 +2341,7 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 		 estimate the upper bound on the size of the output
 		 based on the number of digits it probably isn't worth
 		 continuing.  */
-	      return;
+	      return false;
 	    }
 	}
 
@@ -2520,11 +2450,14 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 	  break;
 
 	case 'p':
-	  spec.fmtfunc = format_pointer;
-	  break;
+	  /* The %p output is implementation-defined.  It's possible
+	     to determine this format but due to extensions (especially
+	     those of the Linux kernel -- see bug 78512) the first %p
+	     in the format string disables any further processing. */
+	  return false;
 
 	case 'n':
-	  return;
+	  break;
 
 	case 'c':
 	case 'S':
@@ -2533,7 +2466,8 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 	  break;
 
 	default:
-	  return;
+	  /* Unknown conversion specification.  */
+	  return false;
 	}
 
       spec.specifier = *pf++;
@@ -2552,6 +2486,9 @@ pass_sprintf_length::compute_format_length (const call_info &info,
 
       ::format_directive (info, res, dir, dirlen, spec, arg);
     }
+
+  /* Complete format string was processed (with or without warnings).  */
+  return true;
 }
 
 /* Return the size of the object referenced by the expression DEST if
@@ -2893,13 +2830,15 @@ pass_sprintf_length::handle_gimple_call (gimple_stmt_iterator *gsi)
   /* The result is the number of bytes output by the formatted function,
      including the terminating NUL.  */
   format_result res = format_result ();
-  compute_format_length (info, &res);
+
+  bool success = compute_format_length (info, &res);
 
   /* When optimizing and the printf return value optimization is enabled,
      attempt to substitute the computed result for the return value of
      the call.  Avoid this optimization when -frounding-math is in effect
      and the format string contains a floating point directive.  */
-  if (optimize > 0
+  if (success
+      && optimize > 0
       && flag_printf_return_value
       && (!flag_rounding_math || !res.floating))
     try_substitute_return_value (gsi, info, res);
diff --git a/gcc/target.def b/gcc/target.def
index 85a0ac0..3fd6521 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3381,12 +3381,6 @@ greater than 128 and a multiple of 32.",
  machine_mode, (int n, bool extended),
  default_floatn_mode)
 
-DEFHOOK
-(printf_pointer_format,
- "Determine the target @code{printf} implementation format string that the most closely corresponds to the @code{%p} format directive.  The object pointed to by the @var{flags} is set to a string consisting of recognized format flags such as the @code{'#'} character.",
- const char*, (tree, const char **flags),
- default_printf_pointer_format)
-
 /* Compute cost of moving data from a register of class FROM to one of
    TO, using MODE.  */
 DEFHOOK
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index a80b301..5d3e91e 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1512,36 +1512,6 @@ no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED)
   return false;
 }
 
-/* Return the format string to which "%p" corresponds.  By default,
-   assume it corresponds to the C99 "%zx" format and set *FLAGS to
-   the empty string to indicate that format flags have no effect.
-   An example of an implementation that matches this description
-   is AIX.  */
-
-const char*
-default_printf_pointer_format (tree, const char **flags)
-{
-  *flags = "";
-
-  return "%zx";
-}
-
-/* For Glibc and uClibc targets also define the hook here because
-   otherwise it would have to be duplicated in each target's .c file
-   (such as in bfin/bfin.c and c6x/c6x.c, etc.)
-   Glibc and uClibc format pointers as if by "%zx" except for the null
-   pointer which outputs "(nil)".  It ignores the pound ('#') format
-   flag but interprets the space and plus flags the same as in the integer
-   directive.  */
-
-const char*
-linux_printf_pointer_format (tree arg, const char **flags)
-{
-  *flags = " +";
-
-  return arg && integer_zerop (arg) ? "(nil)" : "%#zx";
-}
-
 tree
 default_builtin_tm_load_store (tree ARG_UNUSED (type))
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index e00da60..3a9271f 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -191,10 +191,6 @@ extern bool default_libc_has_function (enum function_class);
 extern bool no_c99_libc_has_function (enum function_class);
 extern bool gnu_libc_has_function (enum function_class);
 
-extern const char* default_printf_pointer_format (tree, const char **);
-extern const char* linux_printf_pointer_format (tree, const char **);
-extern const char* solaris_printf_pointer_format (tree, const char **);
-
 extern tree default_builtin_tm_load_store (tree);
 
 extern int default_memory_move_cost (machine_mode, reg_class_t, bool);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-6.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-6.c
index 375fc09..4c41234 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-6.c
@@ -1,9 +1,11 @@
 /* PR middle-end/78476 - snprintf(0, 0, ...) with known arguments not
    optimized away
+   PR middle-end/78512 - r242674 miscompiles Linux kernel
    A negative test complementing builtin-sprintf-5.c to verify that calls
    to the function that do not return a constant are not optimized away.
+   Test also verifies that unknown directives prevent the optimization.
    { dg-compile }
-   { dg-options "-O2 -fdump-tree-optimized" }
+   { dg-options "-O2 -Wformat -fdump-tree-optimized" }
    { dg-require-effective-target int32plus } */
 
 #define CONCAT(a, b) a ## b
@@ -48,10 +50,25 @@ void test_arg_int (int width, int prec, int i, int n)
 
   T ("%i", R (1, 10));
 
+  T ("%'i", 1234567);
+
   for (i = -n; i != n; ++i)
     T ("%*x", n, i);
 }
 
+/* Support for %p was removed in response to PR middle-end/78512 due
+   to the Linux kernel relying on GCC builtins while at the same time
+   providing a large number of extensions to the %p directive that
+   interfere with the optimization.  Verify that %p disables it.  */
+
+void test_arg_ptr (int width, int prec, int i)
+{
+  T ("%p", (void*)0);
+  T ("p=%p", (void*)0);
+  T ("%s=%p", "p=", (void*)0);
+  T ("%i%p", 123, (void*)0);
+}
+
 void test_arg_string (int width, int prec, const char *s)
 {
   T ("%-s", s);
@@ -69,5 +86,24 @@ void test_arg_string (int width, int prec, const char *s)
   T ("%*.*s", width, prec, "123");
 }
 
+void test_invalid_directive (void)
+{
+  T ("%");        /* { dg-warning "spurious trailing .%." } */
+  T ("abc%");     /* { dg-warning "spurious trailing .%." } */
+
+  T ("%2$i");     /* { dg-warning "operand number out of range" } */
+  T ("abc%2$i");  /* { dg-warning "operand number out of range" } */
+
+  T ("%=i", 0);   /* { dg-warning "unknown conversion type character .=." } */
+  /* { dg-warning "too many arguments" "" { target *-*-* } .-1 } */
+
+  T ("%*i", "", 0); /* { dg-warning "field width specifier .\\*. expects argument of type .int." } */
+  T ("%.*i", "", 0); /* { dg-warning "field precision specifier .\\.\\*. expects argument of type .int." } */
+  T ("%.*.i", 0);   /* { dg-warning "unknown conversion type character .\\.." } */
+  T ("%Q");       /* { dg-warning "unknown conversion type character .Q." } */
+  T ("abc%Q");    /* { dg-warning "unknown conversion type character .Q." } */
+}
 
-/* { dg-final { scan-tree-dump-times "snprintf" 27 "optimized"} } */
+/* Use 'grep "^ *T (" builtin-sprintf-6.c  | wc -l' to determine
+   the count for the directive below.
+   { dg-final { scan-tree-dump-times "snprintf" 42 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
index 7937149..4b0813e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
@@ -99,30 +99,6 @@ void test_sprintf_c_const (void)
   T (-1, "%*cX", INT_MAX,     '1'); /* { dg-warning "output of \[0-9\]+ bytes causes result to exceed .INT_MAX." } */
 }
 
-/* Exercise the "%p" directive with constant arguments.  */
-
-void test_sprintf_p_const (void)
-{
-  /* GLIBC and uClibc format null pointers as "(nil)".  Sane implementations
-     format null pointers as 0 or 0x0 and so the following will only be
-     diagnosed on the former targets.  */
-  T (5, "%p",     (void*)0);
-  /* { dg-warning "nul past the end" "(nil)" { target *-linux-gnu *-*-uclinux } .-1 } */
-
-  /* The exact output for %p is unspecified by C.  Two formats are known:
-     same as %tx (for example AIX) and same as %#tx (for example Solaris).  */
-  T (0, "%p",     (void*)0x1);    /* { dg-warning ".%p. directive writing . bytes? into a region of size 0" } */
-  T (1, "%p",     (void*)0x12);   /* { dg-warning ".%p. directive writing . bytes? into a region of size 1" } */
-  T (2, "%p",     (void*)0x123);  /* { dg-warning ".%p. directive writing . bytes? into a region of size 2" } */
-
-  /* GLIBC and uClibc treat the ' ' flag with the "%p" directive the same
-     as with signed integer conversions (i.e., it prepends a space).  Other
-     known implementations ignore it.  */
-  T (6, "% p",    (void*)0x234);  /* { dg-warning ". . flag used with .%p." } */
-  /* { dg-warning "nul past the end" "Glibc %p" { target *-linux-gnu } .-1 } */
-  /* { dg-warning "nul past the end" "Generic %p" { target *-*-uclinux } .-2 } */
-}
-
 /* Verify that no warning is issued for calls that write into a flexible
    array member whose size isn't known.  Also verify that calls that use
    a flexible array member as an argument to the "%s" directive do not

Reply via email to