aganea created this revision.
aganea added reviewers: arphaman, dexonsmith, Bigcheese.
aganea added a project: clang.
Herald added a subscriber: tschuett.

This patch fixes:

1. Invisible characters that come just before #include, such as #ifndef. 
( were hidden, depending on the display locale). I choose to simply skip 
over non-ASCII characters.
2. Double slashes in #include directive with angle brackets not handled 
correctly: #include <dir//file.h>
3. #error directive with quoted, multi-line content, along with CR+LF line 
endings wasn't handled correctly.


Repository:
  rC Clang

https://reviews.llvm.org/D65906

Files:
  lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  test/Lexer/minimize_source_to_dependency_directives_include.c
  test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c
  test/Lexer/minimize_source_to_dependency_directives_invalid_error.c

Index: test/Lexer/minimize_source_to_dependency_directives_invalid_error.c
===================================================================
--- test/Lexer/minimize_source_to_dependency_directives_invalid_error.c
+++ test/Lexer/minimize_source_to_dependency_directives_invalid_error.c
@@ -0,0 +1,16 @@
+// Test CF+LF are properly handled along with quoted, multi-line #error
+// RUN: cat %s | unix2dos | %clang_cc1 -DOTHER -print-dependency-directives-minimized-source 2>&1 | FileCheck %s
+
+#ifndef TEST
+#error "message \
+   more message \
+   even more"
+#endif
+
+#ifdef OTHER
+#include <string>
+#endif
+
+// CHECK:      #ifdef OTHER
+// CHECK-NEXT: #include <string>
+// CHECK-NEXT: #endif
Index: test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c
===================================================================
--- test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c
+++ test/Lexer/minimize_source_to_dependency_directives_invalid_chars.c
@@ -0,0 +1,9 @@
+// Test invisible, bad characters just before #ifdef
+// RUN: echo -n -e '\xef\xbb\xbf#ifdef TEST\n' > %t.c
+// RUN: echo '#include <string>' >> %t.c
+// RUN: echo '#endif' >> %t.c
+// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %t.c 2>&1 | FileCheck %s
+
+// CHECK:      #ifdef TEST
+// CHECK-NEXT: #include <string>
+// CHECK-NEXT: #endif
Index: test/Lexer/minimize_source_to_dependency_directives_include.c
===================================================================
--- test/Lexer/minimize_source_to_dependency_directives_include.c
+++ test/Lexer/minimize_source_to_dependency_directives_include.c
@@ -0,0 +1,8 @@
+// Test double slashes in #include directive along with angle brackets. Previously, this was interpreted as comments.
+// RUN: %clang_cc1 -DTEST -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s
+
+#include "a//b.h"
+#include <a//b.h>
+
+// CHECK: #include "a//b.h"
+// CHECK: #include <a//b.h>
Index: lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===================================================================
--- lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -113,7 +113,8 @@
 }
 
 static void skipOverSpaces(const char *&First, const char *const End) {
-  while (First != End && isHorizontalWhitespace(*First))
+  while (First != End &&
+         (isHorizontalWhitespace(*First) || !clang::isASCII(*First)))
     ++First;
 }
 
@@ -185,8 +186,8 @@
 }
 
 static void skipString(const char *&First, const char *const End) {
-  assert(*First == '\'' || *First == '"');
-  const char Terminator = *First;
+  assert(*First == '\'' || *First == '"' || *First == '<');
+  const char Terminator = *First == '<' ? '>' : *First;
   for (++First; First != End && *First != Terminator; ++First)
     if (*First == '\\')
       if (++First == End)
@@ -195,15 +196,27 @@
     ++First; // Finish off the string.
 }
 
-static void skipNewline(const char *&First, const char *End) {
-  assert(isVerticalWhitespace(*First));
-  ++First;
+// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
+static unsigned isEOL(const char *First, const char *const End) {
   if (First == End)
-    return;
+    return 0;
+  if (End - First > 1) {
+    if (isVerticalWhitespace(First[0]) && isVerticalWhitespace(First[1]) &&
+        First[0] != First[1])
+      return 2;
+  }
+  return !!isVerticalWhitespace(First[0]);
+}
 
-  // Check for "\n\r" and "\r\n".
-  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
-    ++First;
+static unsigned skipNewline(const char *&First, const char *End) {
+  unsigned Len = isEOL(First, End);
+  assert(Len);
+  First += Len;
+  return Len;
+}
+
+static bool wasLineContinuation(const char *First, unsigned Len) {
+  return First[-(int)Len - 1] == '\\';
 }
 
 static void skipToNewlineRaw(const char *&First, const char *const End) {
@@ -211,17 +224,22 @@
     if (First == End)
       return;
 
-    if (isVerticalWhitespace(*First))
+    unsigned Len = isEOL(First, End);
+    if (Len)
       return;
 
-    while (!isVerticalWhitespace(*First))
+    do {
       if (++First == End)
         return;
+      Len = isEOL(First, End);
+    } while (!Len);
+
+    First += Len;
 
-    if (First[-1] != '\\')
+    if (!wasLineContinuation(First, Len))
       return;
 
-    ++First; // Keep going...
+    // Keep skipping lines...
   }
 }
 
@@ -276,7 +294,7 @@
 }
 
 static void skipLine(const char *&First, const char *const End) {
-  do {
+  for (;;) {
     assert(First <= End);
     if (First == End)
       return;
@@ -321,9 +339,10 @@
       return;
 
     // Skip over the newline.
-    assert(isVerticalWhitespace(*First));
-    skipNewline(First, End);
-  } while (First[-2] == '\\'); // Continue past line-continuations.
+    unsigned Len = skipNewline(First, End);
+    if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
+      break;
+  }
 }
 
 static void skipDirective(StringRef Name, const char *&First,
@@ -350,6 +369,10 @@
           skipString(Last, End);
         continue;
       }
+      if (top() == pp_include && *Last == '<') {
+        skipString(Last, End);
+        continue;
+      }
       if (*Last != '/' || End - Last < 2) {
         ++Last;
         continue; // Gather the rest up to print verbatim.
@@ -378,6 +401,9 @@
     // Print out the string.
     if (Last == End || Last == First || Last[-1] != '\\') {
       append(First, reverseOverSpaces(First, Last));
+
+      First = Last;
+      skipNewline(First, End);
       return;
     }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to