erikjv updated this revision to Diff 117660.
erikjv edited the summary of this revision.

https://reviews.llvm.org/D33765

Files:
  include/clang/Basic/SourceManager.h
  lib/Basic/SourceManager.cpp
  test/Misc/diag-utf8.cpp

Index: test/Misc/diag-utf8.cpp
===================================================================
--- /dev/null
+++ test/Misc/diag-utf8.cpp
@@ -0,0 +1,10 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Foo { int member; };
+
+void f(Foo foo)
+{
+    "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+    "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+    "idez̈en" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:9:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+}
Index: lib/Basic/SourceManager.cpp
===================================================================
--- lib/Basic/SourceManager.cpp
+++ lib/Basic/SourceManager.cpp
@@ -1084,11 +1084,50 @@
   return Buffer->getBufferStart() + (CharDataInvalid? 0 : LocInfo.second);
 }
 
+static unsigned correctForMultiByteChars(const char *Buf, unsigned LineStart,
+                                         unsigned Column) {
+  auto isDiacriticMark = [Buf, LineStart, Column](unsigned I) -> bool {
+    if (I + 1 >= Column)
+      return false;
+    unsigned char FirstByte = static_cast<unsigned char>(Buf[LineStart + I]);
+    unsigned char SecondByte =
+        static_cast<unsigned char>(Buf[LineStart + I + 1]);
+    if (FirstByte == 0xcc) {
+      return SecondByte >= 0x80;
+    } else if (FirstByte == 0xcd) {
+      return SecondByte < 0xaf;
+    }
+    return false;
+  };
+
+  unsigned CorrectedColumn = Column;
+  unsigned char FirstByte;
+  for (unsigned I = 0; I < Column; ++I) {
+    FirstByte = static_cast<unsigned char>(Buf[LineStart + I]);
+    if (FirstByte < 0xc0)
+      continue;
+    if (isDiacriticMark(I)) {
+      CorrectedColumn -= 2;
+      ++I;
+    } else if (FirstByte < 0xe0) {
+      --CorrectedColumn;
+      ++I;
+    } else if (FirstByte < 0xf0) {
+      CorrectedColumn -= 2;
+      I += 2;
+    } else {
+      CorrectedColumn -= 3;
+      I += 3;
+    }
+  }
+  return CorrectedColumn;
+}
 
 /// getColumnNumber - Return the column # for the specified file position.
 /// this is significantly cheaper to compute than the line number.
 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
-                                        bool *Invalid) const {
+                                        bool *Invalid,
+                                        bool BytePosition) const {
   bool MyInvalid = false;
   llvm::MemoryBuffer *MemBuf = getBuffer(FID, &MyInvalid);
   if (Invalid)
@@ -1122,14 +1161,18 @@
         if (Buf[FilePos - 1] == '\r' || Buf[FilePos - 1] == '\n')
           --FilePos;
       }
-      return FilePos - LineStart + 1;
+      unsigned Column = FilePos - LineStart + 1;
+      return BytePosition ? Column
+                          : correctForMultiByteChars(Buf, LineStart, Column);
     }
   }
 
   unsigned LineStart = FilePos;
   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
     --LineStart;
-  return FilePos-LineStart+1;
+  unsigned Column = FilePos - LineStart + 1;
+  return BytePosition ? Column
+                      : correctForMultiByteChars(Buf, LineStart, Column);
 }
 
 // isInvalid - Return the result of calling loc.isInvalid(), and
@@ -1454,7 +1497,8 @@
   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second, &Invalid);
   if (Invalid)
     return PresumedLoc();
-  unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid);
+  unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second, &Invalid,
+                                   /*BytePosition=*/false);
   if (Invalid)
     return PresumedLoc();
   
Index: include/clang/Basic/SourceManager.h
===================================================================
--- include/clang/Basic/SourceManager.h
+++ include/clang/Basic/SourceManager.h
@@ -1300,7 +1300,8 @@
   /// on a file sloc, so you must choose a spelling or expansion location
   /// before calling this method.
   unsigned getColumnNumber(FileID FID, unsigned FilePos,
-                           bool *Invalid = nullptr) const;
+                           bool *Invalid = nullptr,
+                           bool BytePosition = true) const;
   unsigned getSpellingColumnNumber(SourceLocation Loc,
                                    bool *Invalid = nullptr) const;
   unsigned getExpansionColumnNumber(SourceLocation Loc,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D33765: Show corre... Erik Verbruggen via Phabricator via cfe-commits

Reply via email to