Hello,

within a compile cluster, only the preprocessed output of GCC is transferred to 
remote nodes for compilation. 
When GCC produces advanced diagnostics (with -fdiagnostics-show-caret), e.g. 
prints out the affected source
line and fixit hints, it attempts to read the source file again, even when 
compiling a preprocessed file (-fpreprocessed). 
This leads to wrong diagnostics when building with a compile cluster, or, more 
generally, when changing or deleting the original source file.

This patch attempts to alter the behavior by implementing a 
location_get_source_line_preprocessed 
function that can be used in diagnostic-show-locus.c in case a preprocessed 
file is compiled.
There was some previous discussion on this behavior on PR preprocessor/79106.

This is my first patch to GCC, so in case something is wrong with the format, 
please let me know.

Best regards
Lucas

2019-12-16  Lucas Bader  <lucas.ba...@sap.com>

        PR preprocessor/79106
        * c-opts.c (c_common_handle_option): pass -fpreprocessed 
        option value to global diagnostic configuration
        
        * diagnostic-show-locus.c (layout::layout): read line from source or 
preprocessed
        file based on -fpreprocessed value
        (source_line::source_line): read line from source or preprocessed
        file based on -fpreprocessed value
        (layout::print_line): read line from source or preprocessed
        file based on -fpreprocessed value
        
        * diagnostic.h (diagnostic_context): new members for reading
        source lines from preprocessed files
        * diagnostic.c (diagnostic_initialize): initialize new members
        
        * input.c (location_get_source_line_preprocessed): new function
        to read source lines from preprocessed files
        (test_reading_source_line_preprocessed): new test case
        (input_c_tests): execute new test case
        
        * opts-global.c (read_cmdline_options): pass input filename to global
        diagnostic context

---

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index c913291c07c..8634de6eb3f 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -485,6 +485,7 @@ c_common_handle_option (size_t scode, const char *arg, 
HOST_WIDE_INT value,
 
     case OPT_fpreprocessed:
       cpp_opts->preprocessed = value;
+      global_dc->is_preprocessed = value;
       break;
 
     case OPT_fdebug_cpp:
diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index cb920f6b9d0..3a4838605de 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -896,7 +896,12 @@ layout::layout (diagnostic_context * context,
      Center the primary caret to fit in max_width; all columns
      will be adjusted accordingly.  */
   size_t max_width = m_context->caret_max_width;
-  char_span line = location_get_source_line (m_exploc.file, m_exploc.line);
+  char_span line (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    line = location_get_source_line_preprocessed (
+       m_exploc.file, global_dc->main_input_file_path, m_exploc.line);
+  else
+    line = location_get_source_line (m_exploc.file, m_exploc.line);
   if (line && (size_t)m_exploc.column <= line.length ())
     {
       size_t right_margin = CARET_LINE_MARGIN;
@@ -1913,7 +1918,12 @@ public:
 
 source_line::source_line (const char *filename, int line)
 {
-  char_span span = location_get_source_line (filename, line);
+  char_span span (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    span = location_get_source_line_preprocessed (
+       filename, global_dc->main_input_file_path, line);
+  else
+    span = location_get_source_line (filename, line);
   chars = span.get_buffer ();
   width = span.length ();
 }
@@ -2237,7 +2247,13 @@ layout::show_ruler (int max_column) const
 void
 layout::print_line (linenum_type row)
 {
-  char_span line = location_get_source_line (m_exploc.file, row);
+  char_span line (NULL, 0);
+  if (global_dc->main_input_file_path != NULL && global_dc->is_preprocessed)
+    line = location_get_source_line_preprocessed (
+       m_exploc.file, global_dc->main_input_file_path, row);
+  else
+    line = location_get_source_line (m_exploc.file, row);
+
   if (!line)
     return;
 
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index a29bcf155e2..6ef99d0e8f1 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -218,6 +218,8 @@ diagnostic_initialize (diagnostic_context *context, int 
n_opts)
   context->begin_group_cb = NULL;
   context->end_group_cb = NULL;
   context->final_cb = default_diagnostic_final_cb;
+  context->main_input_file_path = NULL;
+  context->is_preprocessed = false;
 }
 
 /* Maybe initialize the color support. We require clients to do this
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index 91e4c509605..f233057fbbe 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -158,6 +158,12 @@ struct diagnostic_context
   /* Maximum number of errors to report.  */
   int max_errors;
 
+  /* Original input file.  */
+  const char* main_input_file_path;
+
+  /* True if the input file is treated as preprocessed.  */
+  bool is_preprocessed;
+
   /* This function is called before any message is printed out.  It is
      responsible for preparing message prefix and such.  For example, it
      might say:
diff --git a/gcc/input.c b/gcc/input.c
index 00301ef68dd..6fe0dbfbf34 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -767,6 +767,109 @@ location_get_source_line (const char *file_path, int line)
   return char_span (buffer, len);
 }
 
+/* Return the physical source line that corresponds to SOURCE_NAME/LINE.
+   Read the line from the preprocessed file at FILE_PATH.
+   The line is not nul-terminated.  The returned pointer is only
+   valid until the next call of location_get_source_line.
+   Note that the line can contain several null characters,
+   so the returned value's length has the actual length of the line.
+   If the function fails, a NULL char_span is returned.  */
+
+char_span
+location_get_source_line_preprocessed (const char *source_name,
+                                      const char *file_path, int line)
+{
+  char *buffer = NULL;
+  ssize_t len;
+
+  if (line == 0)
+    return char_span (NULL, 0);
+
+  fcache *c = lookup_or_add_file_to_cache_tab (file_path);
+  if (c == NULL)
+    return char_span (NULL, 0);
+
+  // find linemarker closest to actual line number
+  int linemarker_line = 0; // line no of linemarker in pp file
+  int linemarker_loc = 0;  // source line no indicated in linemarker
+  int current_line = 1;
+  size_t source_name_len = strlen (source_name);
+  while (read_line_num (c, current_line, &buffer, &len))
+    {
+      if (len > 2 && buffer[0] == '#' && ISDIGIT (buffer[2]))
+       {
+         // is a linemarker
+         bool is_match = false;
+         for (ssize_t i = 3; i < len && source_name_len < len - i - 1; ++i)
+           {
+             if (buffer[i] == '"')
+               {
+                 is_match
+                     = memcmp (buffer + i + 1, source_name, source_name_len)
+                       == 0;
+                 break;
+               }
+           }
+         bool match_past_line = false;
+         if (is_match)
+           {
+             // we found a linemarker that matches the source file
+             // e.g.
+             // # 14 "../../file.h"
+             // we use the last suitable marker we find
+             int loc = atoi (buffer + 2);
+             if (loc <= line && line - loc <= line - linemarker_loc)
+               {
+                 // new best candidate
+                 linemarker_loc = loc;
+                 linemarker_line = current_line;
+                 current_line++;
+                 continue;
+               }
+             else if (loc > line)
+               {
+                 // don't break directly due to validity check
+                 match_past_line = true;
+               }
+           }
+         if (linemarker_line != 0
+             && linemarker_line + (line - linemarker_loc) + 1 >= current_line)
+           {
+             // if we find any other linemarker, we have to check if our
+             // potential source line is outside the current candidate if
+             // that's the case, we invalidate the candidate
+             linemarker_line = 0;
+             linemarker_loc = 0;
+           }
+         if (match_past_line)
+           break;
+       }
+      current_line++;
+    }
+  if (linemarker_line != 0)
+    {
+      // line of linemarker in the preprocessed file is the base at which we
+      // start counting the line indicated by the linemarker, e.g. the 5 in # 5
+      // "file.cpp", is the original source line number which starts right
+      // below the linemarker So to read line 7 of the original source file
+      // "file.cpp" we treat line 2055 as line 5 of the original line 7 = 2054
+      // + (7 - 5) + 1 = 2057
+      // ...
+      // 2054: # 5 "file.cpp"
+      // 2055: int func() {
+      // 2056:    int a = 4;
+      // 2057:    int b = 5;
+      // ...
+      bool read = read_line_num (
+         c, linemarker_line + (line - linemarker_loc) + 1, &buffer, &len);
+      if (!read)
+       return char_span (NULL, 0);
+
+      return char_span (buffer, len);
+    }
+  return char_span (NULL, 0);
+}
+
 /* Determine if FILE_PATH missing a trailing newline on its final line.
    Only valid to call once all of the file has been loaded, by
    requesting a line number beyond the end of the file.  */
@@ -1951,6 +2054,70 @@ test_reading_source_line ()
   ASSERT_TRUE (source_line.get_buffer () == NULL);
 }
 
+/* Verify reading of preprocessed input files
+   (e.g. for caret-based diagnostics).  */
+
+static void
+test_reading_source_line_preprocessed ()
+{
+  /* Create a tempfile and write some valid preprocessor output.  */
+  temp_source_file tmp (SELFTEST_LOCATION, ".cpp.ii",
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.cpp\"\n"
+                       "# 1 \"test.h\" 1\n"
+                       "void test_func() {\n"
+                       "# 35 \"test.h\"\n"
+                       "    do_something_else ();\n"
+                       "}\n"
+                       "# 2 \"test.cpp\" 2\n"
+                       "\n"
+                       "int main() {\n"
+                       "    do_something ();\n"
+                       "\n"
+                       "    int i = 5;\n"
+                       "    unsigned j = 3;\n"
+                       "    if (i > j)\n"
+                       "    return 0;\n"
+                       "\n"
+                       "    test_func ();\n"
+                       "}");
+
+  /* Read back a specific line from the tempfile.  */
+  char_span source_line = location_get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 4);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (20, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something ();", source_line.get_buffer (),
+                        source_line.length ()));
+
+  source_line = location_get_source_line_preprocessed (
+      "test.h", tmp.get_filename (), 35);
+  ASSERT_TRUE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () != NULL);
+  ASSERT_EQ (25, source_line.length ());
+  ASSERT_TRUE (!strncmp ("    do_something_else ();",
+                        source_line.get_buffer (), source_line.length ()));
+
+  // file not present in preprocessor output
+  source_line = location_get_source_line_preprocessed ("other.h",
+                                                      tmp.get_filename (), 4);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // off by one
+  source_line = location_get_source_line_preprocessed (
+      "test.cpp", tmp.get_filename (), 13);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+
+  // empty lines omitted by preprocessor
+  source_line = location_get_source_line_preprocessed ("test.h",
+                                                      tmp.get_filename (), 2);
+  ASSERT_FALSE (source_line);
+  ASSERT_TRUE (source_line.get_buffer () == NULL);
+}
+
 /* Tests of lexing.  */
 
 /* Verify that token TOK from PARSER has cpp_token_as_text
@@ -3629,6 +3796,7 @@ input_c_tests ()
   for_each_line_table_case (test_lexer_char_constants);
 
   test_reading_source_line ();
+  test_reading_source_line_preprocessed ();
 
   test_line_offset_overflow ();
 }
diff --git a/gcc/input.h b/gcc/input.h
index c459bf28553..99ab002f941 100644
--- a/gcc/input.h
+++ b/gcc/input.h
@@ -83,6 +83,9 @@ class char_span
 };
 
 extern char_span location_get_source_line (const char *file_path, int line);
+extern char_span
+location_get_source_line_preprocessed (const char *source_name,
+                                      const char *file_path, int line);
 
 extern bool location_missing_trailing_newline (const char *file_path);
 extern expanded_location
diff --git a/gcc/opts-global.c b/gcc/opts-global.c
index b51c2fbc21c..b48fe527370 100644
--- a/gcc/opts-global.c
+++ b/gcc/opts-global.c
@@ -224,6 +224,10 @@ read_cmdline_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
          if (opts->x_main_input_filename == NULL)
            {
              opts->x_main_input_filename = decoded_options[i].arg;
+         // reserve original input filename in diagnostic context
+         // because if a preprocessed file is compiled, this is
+         // changed to the original source file name later
+         dc->main_input_file_path = opts->x_main_input_filename;
              opts->x_main_input_baselength
                = base_of_path (opts->x_main_input_filename,
                                &opts->x_main_input_basename);
-- 
2.12.3

Reply via email to