This revision was automatically updated to reflect the committed changes.
Closed by commit rG8fa5e98fd191: [clang-format] Remove duplciate code from 
Invalid BOM detection (authored by MyDeveloperDay).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68914/new/

https://reviews.llvm.org/D68914

Files:
  clang/include/clang/Basic/SourceManager.h
  clang/lib/Basic/SourceManager.cpp
  clang/tools/clang-format/ClangFormat.cpp
  clang/unittests/Basic/SourceManagerTest.cpp

Index: clang/unittests/Basic/SourceManagerTest.cpp
===================================================================
--- clang/unittests/Basic/SourceManagerTest.cpp
+++ clang/unittests/Basic/SourceManagerTest.cpp
@@ -200,6 +200,47 @@
             "</mainFile.cpp:1:1, /test-header.h:1:1>");
 }
 
+TEST_F(SourceManagerTest, getInvalidBOM) {
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM(""), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\x00\x00\x00"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\xFF\xFF\xFF"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("#include <iostream>"),
+            nullptr);
+
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFE\xFF#include <iostream>")),
+            "UTF-16 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFF\xFE#include <iostream>")),
+            "UTF-16 (LE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x2B\x2F\x76#include <iostream>")),
+            "UTF-7");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xF7\x64\x4C#include <iostream>")),
+            "UTF-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xDD\x73\x66\x73#include <iostream>")),
+            "UTF-EBCDIC");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x0E\xFE\xFF#include <iostream>")),
+            "SCSU");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFB\xEE\x28#include <iostream>")),
+            "BOCU-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x84\x31\x95\x33#include <iostream>")),
+            "GB-18030");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\x00\x00\xFE\xFF#include <iostream>"))),
+            "UTF-32 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\xFF\xFE\x00\x00#include <iostream>"))),
+            "UTF-32 (LE)");
+}
+
 #if defined(LLVM_ON_UNIX)
 
 TEST_F(SourceManagerTest, getMacroArgExpandedLocation) {
Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -289,31 +289,6 @@
   }
 }
 
-// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
-// nullptr.
-static const char *getInValidBOM(StringRef BufStr) {
-  // Check to see if the buffer has a UTF Byte Order Mark (BOM).
-  // We only support UTF-8 with and without a BOM right now.  See
-  // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
-  // for more information.
-  const char *InvalidBOM =
-      llvm::StringSwitch<const char *>(BufStr)
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                      "UTF-32 (BE)")
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                      "UTF-32 (LE)")
-          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-          .StartsWith("\x2B\x2F\x76", "UTF-7")
-          .StartsWith("\xF7\x64\x4C", "UTF-1")
-          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-          .StartsWith("\x0E\xFE\xFF", "SCSU")
-          .StartsWith("\xFB\xEE\x28", "BOCU-1")
-          .StartsWith("\x84\x31\x95\x33", "GB-18030")
-          .Default(nullptr);
-  return InvalidBOM;
-}
-
 static bool
 emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
                         const std::unique_ptr<llvm::MemoryBuffer> &Code) {
@@ -412,7 +387,7 @@
 
   StringRef BufStr = Code->getBuffer();
 
-  const char *InvalidBOM = getInValidBOM(BufStr);
+  const char *InvalidBOM = SrcMgr::ContentCache::getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     errs() << "error: encoding with unsupported byte order mark \""
Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -95,6 +95,29 @@
   Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0);
 }
 
+const char *ContentCache::getInvalidBOM(StringRef BufStr) {
+  // If the buffer is valid, check to see if it has a UTF Byte Order Mark
+  // (BOM).  We only support UTF-8 with and without a BOM right now.  See
+  // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
+  const char *InvalidBOM =
+      llvm::StringSwitch<const char *>(BufStr)
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+                      "UTF-32 (BE)")
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+                      "UTF-32 (LE)")
+          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+          .StartsWith("\x2B\x2F\x76", "UTF-7")
+          .StartsWith("\xF7\x64\x4C", "UTF-1")
+          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+          .StartsWith("\x0E\xFE\xFF", "SCSU")
+          .StartsWith("\xFB\xEE\x28", "BOCU-1")
+          .StartsWith("\x84\x31\x95\x33", "GB-18030")
+          .Default(nullptr);
+
+  return InvalidBOM;
+}
+
 const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
                                                   FileManager &FM,
                                                   SourceLocation Loc,
@@ -190,20 +213,7 @@
   // (BOM).  We only support UTF-8 with and without a BOM right now.  See
   // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
   StringRef BufStr = Buffer.getPointer()->getBuffer();
-  const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                                                  "UTF-32 (BE)")
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                                                  "UTF-32 (LE)")
-    .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-    .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-    .StartsWith("\x2B\x2F\x76", "UTF-7")
-    .StartsWith("\xF7\x64\x4C", "UTF-1")
-    .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-    .StartsWith("\x0E\xFE\xFF", "SCSU")
-    .StartsWith("\xFB\xEE\x28", "BOCU-1")
-    .StartsWith("\x84\x31\x95\x33", "GB-18030")
-    .Default(nullptr);
+  const char *InvalidBOM = getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     Diag.Report(Loc, diag::err_unsupported_bom)
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -226,6 +226,10 @@
     bool shouldFreeBuffer() const {
       return (Buffer.getInt() & DoNotFreeFlag) == 0;
     }
+
+    // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
+    // nullptr
+    static const char *getInvalidBOM(StringRef BufStr);
   };
 
   // Assert that the \c ContentCache objects will always be 8-byte aligned so
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to