erikjv updated this revision to Diff 124903.
erikjv added a comment.
I moved all code to the TextDiagnostics, so all other interfaces still get byte
offsets.
https://reviews.llvm.org/D33765
Files:
lib/Frontend/TextDiagnostic.cpp
test/Misc/diag-utf8.cpp
Index: test/Misc/diag-utf8.cpp
===================================================================
--- /dev/null
+++ test/Misc/diag-utf8.cpp
@@ -0,0 +1,10 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Foo { int member; };
+
+void f(Foo foo)
+{
+ "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid
operands to binary expression ('const char *' and 'Foo')
+ "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid
operands to binary expression ('const char *' and 'Foo')
+ "idez̈en" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:9:14: error: invalid
operands to binary expression ('const char *' and 'Foo')
+}
Index: lib/Frontend/TextDiagnostic.cpp
===================================================================
--- lib/Frontend/TextDiagnostic.cpp
+++ lib/Frontend/TextDiagnostic.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Unicode.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -818,6 +819,28 @@
if (DiagOpts->ShowColumn)
// Compute the column number.
if (unsigned ColNo = PLoc.getColumn()) {
+ // Correct the column number for multi-byte UTF-8 code-points.
+ bool Invalid = false;
+ StringRef BufData = Loc.getBufferData(&Invalid);
+ if (!Invalid) {
+ const char *BufStart = BufData.data();
+ const char *BufEnd = BufStart + BufData.size();
+
+ // Decompose the location into a FID/Offset pair.
+ std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+ FileID FID = LocInfo.first;
+ const SourceManager &SM = Loc.getManager();
+ const char *LineStart =
+ BufStart +
+ SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
+ if (LineStart + ColNo < BufEnd) {
+ StringRef SourceLine(LineStart, ColNo);
+ int CorrectedColNo = llvm::sys::unicode::columnWidthUTF8(SourceLine);
+ if (CorrectedColNo != -1)
+ ColNo = unsigned(CorrectedColNo);
+ }
+ }
+
if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) {
OS << ',';
// Visual Studio 2010 or earlier expects column number to be off by one
Index: test/Misc/diag-utf8.cpp
===================================================================
--- /dev/null
+++ test/Misc/diag-utf8.cpp
@@ -0,0 +1,10 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Foo { int member; };
+
+void f(Foo foo)
+{
+ "ideeen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:7:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+ "ideëen" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:8:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+ "idez̈en" << foo; // CHECK: {{.*[/\\]}}diag-utf8.cpp:9:14: error: invalid operands to binary expression ('const char *' and 'Foo')
+}
Index: lib/Frontend/TextDiagnostic.cpp
===================================================================
--- lib/Frontend/TextDiagnostic.cpp
+++ lib/Frontend/TextDiagnostic.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Unicode.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -818,6 +819,28 @@
if (DiagOpts->ShowColumn)
// Compute the column number.
if (unsigned ColNo = PLoc.getColumn()) {
+ // Correct the column number for multi-byte UTF-8 code-points.
+ bool Invalid = false;
+ StringRef BufData = Loc.getBufferData(&Invalid);
+ if (!Invalid) {
+ const char *BufStart = BufData.data();
+ const char *BufEnd = BufStart + BufData.size();
+
+ // Decompose the location into a FID/Offset pair.
+ std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+ FileID FID = LocInfo.first;
+ const SourceManager &SM = Loc.getManager();
+ const char *LineStart =
+ BufStart +
+ SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second;
+ if (LineStart + ColNo < BufEnd) {
+ StringRef SourceLine(LineStart, ColNo);
+ int CorrectedColNo = llvm::sys::unicode::columnWidthUTF8(SourceLine);
+ if (CorrectedColNo != -1)
+ ColNo = unsigned(CorrectedColNo);
+ }
+ }
+
if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) {
OS << ',';
// Visual Studio 2010 or earlier expects column number to be off by one
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits