Author: Timm Baeder Date: 2024-01-27T17:52:20+01:00 New Revision: 718aac9f7a19227b5c5ec85819a3a5ae24ce32e1
URL: https://github.com/llvm/llvm-project/commit/718aac9f7a19227b5c5ec85819a3a5ae24ce32e1 DIFF: https://github.com/llvm/llvm-project/commit/718aac9f7a19227b5c5ec85819a3a5ae24ce32e1.diff LOG: [clang][Diagnostics] Highlight code snippets (#66514) Add some primitive syntax highlighting to our code snippet output. This adds "checkpoints" to the Preprocessor, which we can use to start lexing from. When printing a code snippet, we lex from the nearest checkpoint and highlight the tokens based on their token type. Added: clang/test/Frontend/diagnostic-pipe.c Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Frontend/TextDiagnostic.h clang/include/clang/Lex/Preprocessor.h clang/lib/Frontend/TextDiagnostic.cpp clang/lib/Frontend/TextDiagnosticPrinter.cpp clang/lib/Lex/Preprocessor.cpp Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index aa06e2b60ce915..254e0a9cb72979 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -103,6 +103,8 @@ Attribute Changes in Clang Improvements to Clang's diagnostics ----------------------------------- +- Clang now applies syntax highlighting to the code snippets it + prints. Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 7eb0ab0cdc9bca..a2fe8ae995423b 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -16,6 +16,7 @@ #define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H #include "clang/Frontend/DiagnosticRenderer.h" +#include "llvm/Support/raw_ostream.h" namespace clang { @@ -33,14 +34,22 @@ namespace clang { /// printing coming out of libclang. class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; + const Preprocessor *PP; public: - TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, - DiagnosticOptions *DiagOpts); + TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr); ~TextDiagnostic() override; + struct StyleRange { + unsigned Start; + unsigned End; + enum llvm::raw_ostream::Colors Color; + StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C) + : Start(S), End(E), Color(C){}; + }; + /// Print the diagonstic level to a raw_ostream. /// /// This is a static helper that handles colorizing the level and formatting @@ -104,7 +113,8 @@ class TextDiagnostic : public DiagnosticRenderer { ArrayRef<FixItHint> Hints); void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo); + unsigned LineNo, unsigned DisplayLineNo, + ArrayRef<StyleRange> Styles); void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM); }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 2d9c53cdf5bde8..9d0d53129a12dd 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -284,6 +284,13 @@ class Preprocessor { /// The kind of translation unit we are processing. const TranslationUnitKind TUKind; + /// Returns a pointer into the given file's buffer that's guaranteed + /// to be between tokens. The returned pointer is always before \p Start. + /// The maximum distance betweenthe returned pointer and \p Start is + /// limited by a constant value, but also an implementation detail. + /// If no such check point exists, \c nullptr is returned. + const char *getCheckPoint(FileID FID, const char *Start) const; + private: /// The code-completion handler. CodeCompletionHandler *CodeComplete = nullptr; @@ -311,6 +318,9 @@ class Preprocessor { /// The import path for named module that we're currently processing. SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath; + llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints; + unsigned CheckPointCounter = 0; + /// Whether the import is an `@import` or a standard c++ modules import. bool IsAtImport = false; diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 779dead5d058d1..291d71f6db61f1 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" @@ -41,6 +42,14 @@ static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; static const enum raw_ostream::Colors savedColor = raw_ostream::SAVEDCOLOR; +// Magenta is taken for 'warning'. Red is already 'error' and 'cyan' +// is already taken for 'note'. Green is already used to underline +// source ranges. White and black are bad because of the usual +// terminal backgrounds. Which leaves us only with TWO options. +static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW; +static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; +static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; + /// Add highlights to diff erences in template strings. static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, bool &Normal, bool Bold) { @@ -644,10 +653,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, return Wrapped; } -TextDiagnostic::TextDiagnostic(raw_ostream &OS, - const LangOptions &LangOpts, - DiagnosticOptions *DiagOpts) - : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {} +TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, + DiagnosticOptions *DiagOpts, + const Preprocessor *PP) + : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} TextDiagnostic::~TextDiagnostic() {} @@ -1112,6 +1121,162 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges, return LineRanges; } +/// Creates syntax highlighting information in form of StyleRanges. +/// +/// The returned unique ptr has always exactly size +/// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there +/// corresponds to syntax highlighting information in one line. In each line, +/// the StyleRanges are non-overlapping and sorted from start to end of the +/// line. +static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]> +highlightLines(StringRef FileData, unsigned StartLineNumber, + unsigned EndLineNumber, const Preprocessor *PP, + const LangOptions &LangOpts, bool ShowColors, FileID FID, + const SourceManager &SM) { + assert(StartLineNumber <= EndLineNumber); + auto SnippetRanges = + std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>( + EndLineNumber - StartLineNumber + 1); + + if (!PP || !ShowColors) + return SnippetRanges; + + // Might cause emission of another diagnostic. + if (PP->getIdentifierTable().getExternalIdentifierLookup()) + return SnippetRanges; + + auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData); + Lexer L{FID, *Buff, SM, LangOpts}; + L.SetKeepWhitespaceMode(true); + + const char *FirstLineStart = + FileData.data() + + SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second; + if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) { + assert(CheckPoint >= Buff->getBufferStart() && + CheckPoint <= Buff->getBufferEnd()); + assert(CheckPoint <= FirstLineStart); + size_t Offset = CheckPoint - Buff->getBufferStart(); + L.seek(Offset, /*IsAtStartOfLine=*/false); + } + + // Classify the given token and append it to the given vector. + auto appendStyle = + [PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec, + const Token &T, unsigned Start, unsigned Length) -> void { + if (T.is(tok::raw_identifier)) { + StringRef RawIdent = T.getRawIdentifier(); + // Special case true/false/nullptr/... literals, since they will otherwise + // be treated as keywords. + // FIXME: It would be good to have a programmatic way of getting this + // list. + if (llvm::StringSwitch<bool>(RawIdent) + .Case("true", true) + .Case("false", true) + .Case("nullptr", true) + .Case("__func__", true) + .Case("__objc_yes__", true) + .Case("__objc_no__", true) + .Case("__null", true) + .Case("__FUNCDNAME__", true) + .Case("__FUNCSIG__", true) + .Case("__FUNCTION__", true) + .Case("__FUNCSIG__", true) + .Default(false)) { + Vec.emplace_back(Start, Start + Length, LiteralColor); + } else { + const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); + assert(II); + if (II->isKeyword(LangOpts)) + Vec.emplace_back(Start, Start + Length, KeywordColor); + } + } else if (tok::isLiteral(T.getKind())) { + Vec.emplace_back(Start, Start + Length, LiteralColor); + } else { + assert(T.is(tok::comment)); + Vec.emplace_back(Start, Start + Length, CommentColor); + } + }; + + bool Stop = false; + while (!Stop) { + Token T; + Stop = L.LexFromRawLexer(T); + if (T.is(tok::unknown)) + continue; + + // We are only interested in identifiers, literals and comments. + if (!T.is(tok::raw_identifier) && !T.is(tok::comment) && + !tok::isLiteral(T.getKind())) + continue; + + bool Invalid = false; + unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid); + if (Invalid || TokenEndLine < StartLineNumber) + continue; + + assert(TokenEndLine >= StartLineNumber); + + unsigned TokenStartLine = + SM.getSpellingLineNumber(T.getLocation(), &Invalid); + if (Invalid) + continue; + // If this happens, we're done. + if (TokenStartLine > EndLineNumber) + break; + + unsigned StartCol = + SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; + if (Invalid) + continue; + + // Simple tokens. + if (TokenStartLine == TokenEndLine) { + SmallVector<TextDiagnostic::StyleRange> &LineRanges = + SnippetRanges[TokenStartLine - StartLineNumber]; + appendStyle(LineRanges, T, StartCol, T.getLength()); + continue; + } + assert((TokenEndLine - TokenStartLine) >= 1); + + // For tokens that span multiple lines (think multiline comments), we + // divide them into multiple StyleRanges. + unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; + if (Invalid) + continue; + + std::string Spelling = Lexer::getSpelling(T, SM, LangOpts); + + unsigned L = TokenStartLine; + unsigned LineLength = 0; + for (unsigned I = 0; I <= Spelling.size(); ++I) { + // This line is done. + if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) { + SmallVector<TextDiagnostic::StyleRange> &LineRanges = + SnippetRanges[L - StartLineNumber]; + + if (L >= StartLineNumber) { + if (L == TokenStartLine) // First line + appendStyle(LineRanges, T, StartCol, LineLength); + else if (L == TokenEndLine) // Last line + appendStyle(LineRanges, T, 0, EndCol); + else + appendStyle(LineRanges, T, 0, LineLength); + } + + ++L; + if (L > EndLineNumber) + break; + LineLength = 0; + continue; + } + ++LineLength; + } + } + + return SnippetRanges; +} + /// Emit a code snippet and caret line. /// /// This routine emits a single line's code snippet and caret line.. @@ -1181,6 +1346,12 @@ void TextDiagnostic::emitSnippetAndCaret( OS.indent(MaxLineNoDisplayWidth + 2) << "| "; }; + // Prepare source highlighting information for the lines we're about to + // emit, starting from the first line. + std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles = + highlightLines(BufStart, Lines.first, Lines.second, PP, LangOpts, + DiagOpts->ShowColors, FID, SM); + SmallVector<LineRange> LineRanges = prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts); @@ -1247,7 +1418,8 @@ void TextDiagnostic::emitSnippetAndCaret( } // Emit what we have computed. - emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo); + emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo, + SourceStyles[LineNo - Lines.first]); if (!CaretLine.empty()) { indentForLineNumbers(); @@ -1277,16 +1449,18 @@ void TextDiagnostic::emitSnippetAndCaret( void TextDiagnostic::emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth, - unsigned LineNo) { + unsigned LineNo, unsigned DisplayLineNo, + ArrayRef<StyleRange> Styles) { // Emit line number. if (MaxLineNoDisplayWidth > 0) { - unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo); + unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo); OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1) - << LineNo << " | "; + << DisplayLineNo << " | "; } // Print the source line one character at a time. bool PrintReversed = false; + std::optional<llvm::raw_ostream::Colors> CurrentColor; size_t I = 0; while (I < SourceLine.size()) { auto [Str, WasPrintable] = @@ -1298,10 +1472,29 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine, PrintReversed = !PrintReversed; if (PrintReversed) OS.reverseColor(); - else + else { OS.resetColor(); + CurrentColor = std::nullopt; + } + } + + // Apply syntax highlighting information. + const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) { + return (R.Start < I && R.End >= I); + }); + + if (CharStyle != Styles.end()) { + if (!CurrentColor || + (CurrentColor && *CurrentColor != CharStyle->Color)) { + OS.changeColor(CharStyle->Color, false); + CurrentColor = CharStyle->Color; + } + } else if (CurrentColor) { + OS.resetColor(); + CurrentColor = std::nullopt; } } + OS << Str; } diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp index 0ff5376098ffe8..b2fb762537573e 100644 --- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { // Build the TextDiagnostic utility. - TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts)); + TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP)); } void TextDiagnosticPrinter::EndSourceFile() { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 7fdb5d4c0d7b82..031ed1e16bb8fc 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -72,6 +72,9 @@ using namespace clang; +/// Minimum distance between two check points, in tokens. +static constexpr unsigned CheckPointStepSize = 1024; + LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry) ExternalPreprocessorSource::~ExternalPreprocessorSource() = default; @@ -954,6 +957,11 @@ void Preprocessor::Lex(Token &Result) { } } + if (CurLexer && ++CheckPointCounter == CheckPointStepSize) { + CheckPoints[CurLexer->getFileID()].push_back(CurLexer->BufferPtr); + CheckPointCounter = 0; + } + LastTokenWasAt = Result.is(tok::at); --LexLevel; @@ -1558,3 +1566,19 @@ void Preprocessor::createPreprocessingRecord() { Record = new PreprocessingRecord(getSourceManager()); addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); } + +const char *Preprocessor::getCheckPoint(FileID FID, const char *Start) const { + if (auto It = CheckPoints.find(FID); It != CheckPoints.end()) { + const SmallVector<const char *> &FileCheckPoints = It->second; + const char *Last = nullptr; + // FIXME: Do better than a linear search. + for (const char *P : FileCheckPoints) { + if (P > Start) + break; + Last = P; + } + return Last; + } + + return nullptr; +} diff --git a/clang/test/Frontend/diagnostic-pipe.c b/clang/test/Frontend/diagnostic-pipe.c new file mode 100644 index 00000000000000..61dbdcfb2e0e7c --- /dev/null +++ b/clang/test/Frontend/diagnostic-pipe.c @@ -0,0 +1,9 @@ + +_Static_assert(0, ""); + +/// Test that piping the output into another process disables syntax +/// highlighting of code snippets. + +// RUN: not %clang_cc1 %s -o /dev/null 2>&1 | FileCheck %s +// CHECK: error: static assertion failed: +// CHECK-NEXT: {{^}} 2 | _Static_assert(0, "");{{$}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits