jkorous created this revision. jkorous added reviewers: gribozavr, arphaman. Herald added subscribers: cfe-commits, dexonsmith, mgrang. Herald added a project: clang.
For large number of deserialized comments (~100k) the way how we try to attach them to declaration in completion comments is too expensive. Currently we're sorting all the comments by source location (expensive) and later bisecting them for every interesting declaration to find the closest comment before and after the declaration documentation comment. This patch tries to just iterate over unsorted comments and see if there's any of the decls we're interested in nearby. Repository: rC Clang https://reviews.llvm.org/D61104 Files: clang/include/clang/AST/ASTContext.h clang/include/clang/AST/RawCommentList.h clang/include/clang/Basic/SourceLocation.h clang/include/clang/Sema/CodeCompleteConsumer.h clang/lib/AST/ASTContext.cpp clang/lib/AST/RawCommentList.cpp clang/lib/Sema/SemaCodeComplete.cpp clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/tools/libclang/CIndexCodeCompletion.cpp
Index: clang/tools/libclang/CIndexCodeCompletion.cpp =================================================================== --- clang/tools/libclang/CIndexCodeCompletion.cpp +++ clang/tools/libclang/CIndexCodeCompletion.cpp @@ -579,12 +579,46 @@ StoredResults.reserve(StoredResults.size() + NumResults); if (includeFixIts()) AllocatedResults.FixItsVector.reserve(NumResults); + + auto CanResultHaveComment = + [](const CodeCompletionResult &Result) -> bool { + const auto &Kind = Result.Kind; + + if (Kind == CodeCompletionResult::RK_Declaration || + Kind == CodeCompletionResult::RK_Pattern) { + if (auto D = Result.getDeclaration()) { + return true; + } + } + + return false; + }; + + std::vector<const Decl *> DeclsThatCanHaveCommentAttached; for (unsigned I = 0; I != NumResults; ++I) { - CodeCompletionString *StoredCompletion - = Results[I].CreateCodeCompletionString(S, Context, getAllocator(), - getCodeCompletionTUInfo(), - includeBriefComments()); - + if (CanResultHaveComment(Results[I])) { + if (auto D = Results[I].getDeclaration()) { + DeclsThatCanHaveCommentAttached.push_back(D); + } + } + } + + auto Cache = getCompletionComments(S.getASTContext(), + DeclsThatCanHaveCommentAttached); + + for (unsigned I = 0; I != NumResults; ++I) { + const RawComment *Comment = nullptr; + if (CanResultHaveComment(Results[I])) { + const auto CachedValueIt = Cache.find(Results[I].getDeclaration()); + if (CachedValueIt != Cache.end()) { + Comment = CachedValueIt->second; + } + } + CodeCompletionString *StoredCompletion = + Results[I].CreateCodeCompletionString( + S, Context, getAllocator(), getCodeCompletionTUInfo(), + includeBriefComments(), Comment); + CXCompletionResult R; R.CursorKind = Results[I].CursorKind; R.CompletionString = StoredCompletion; Index: clang/lib/Serialization/ASTWriter.cpp =================================================================== --- clang/lib/Serialization/ASTWriter.cpp +++ clang/lib/Serialization/ASTWriter.cpp @@ -3254,7 +3254,11 @@ auto _ = llvm::make_scope_exit([this] { Stream.ExitBlock(); }); if (!PP->getPreprocessorOpts().WriteCommentListToPCH) return; - ArrayRef<RawComment *> RawComments = Context->Comments.getComments(); + ArrayRef<RawComment *> RawComments = + Context->Comments.getComments(/*Sorted=*/false); + Context->Comments.sort(); + Context->Comments.merge(Context->getLangOpts().CommentOpts); + RecordData Record; for (const auto *I : RawComments) { Record.clear(); Index: clang/lib/Serialization/ASTReader.cpp =================================================================== --- clang/lib/Serialization/ASTReader.cpp +++ clang/lib/Serialization/ASTReader.cpp @@ -9223,7 +9223,6 @@ NextCursor: // De-serialized SourceLocations get negative FileIDs for other modules, // potentially invalidating the original order. Sort it again. - llvm::sort(Comments, BeforeThanCompare<RawComment>(SourceMgr)); Context.Comments.addDeserializedComments(Comments); } } Index: clang/lib/Sema/SemaCodeComplete.cpp =================================================================== --- clang/lib/Sema/SemaCodeComplete.cpp +++ clang/lib/Sema/SemaCodeComplete.cpp @@ -3060,9 +3060,9 @@ CodeCompletionString *CodeCompletionResult::CreateCodeCompletionString( Sema &S, const CodeCompletionContext &CCContext, CodeCompletionAllocator &Allocator, CodeCompletionTUInfo &CCTUInfo, - bool IncludeBriefComments) { + bool IncludeBriefComments, const RawComment *Comment) { return CreateCodeCompletionString(S.Context, S.PP, CCContext, Allocator, - CCTUInfo, IncludeBriefComments); + CCTUInfo, IncludeBriefComments, Comment); } CodeCompletionString *CodeCompletionResult::CreateCodeCompletionStringForMacro( @@ -3120,7 +3120,7 @@ CodeCompletionString *CodeCompletionResult::CreateCodeCompletionString( ASTContext &Ctx, Preprocessor &PP, const CodeCompletionContext &CCContext, CodeCompletionAllocator &Allocator, CodeCompletionTUInfo &CCTUInfo, - bool IncludeBriefComments) { + bool IncludeBriefComments, const RawComment *Comment) { if (Kind == RK_Macro) return CreateCodeCompletionStringForMacro(PP, Allocator, CCTUInfo); @@ -3150,7 +3150,7 @@ } assert(Kind == RK_Declaration && "Missed a result kind?"); return createCodeCompletionStringForDecl( - PP, Ctx, Result, IncludeBriefComments, CCContext, Policy); + PP, Ctx, Result, IncludeBriefComments, CCContext, Policy, Comment); } CodeCompletionString * @@ -3172,15 +3172,18 @@ CodeCompletionString *CodeCompletionResult::createCodeCompletionStringForDecl( Preprocessor &PP, ASTContext &Ctx, CodeCompletionBuilder &Result, bool IncludeBriefComments, const CodeCompletionContext &CCContext, - PrintingPolicy &Policy) { + PrintingPolicy &Policy, const RawComment *Comment) { const NamedDecl *ND = Declaration; Result.addParentContext(ND->getDeclContext()); if (IncludeBriefComments) { + // Try to get the comment if it wasn't provided + if (!Comment) + Comment = getCompletionComment(Ctx, Declaration); + // Add documentation comment, if it exists. - if (const RawComment *RC = getCompletionComment(Ctx, Declaration)) { - Result.addBriefComment(RC->getBriefText(Ctx)); - } + if (Comment) + Result.addBriefComment(Comment->getBriefText(Ctx)); } if (StartsNestedNameSpecifier) { @@ -3387,6 +3390,21 @@ return Ctx.getRawCommentForAnyRedecl(PDecl); } +std::unordered_map<const Decl *, RawComment *> +clang::getCompletionComments(const ASTContext &Ctx, + const std::vector<const Decl *> &NDs) { + std::vector<const Decl *> Decls = NDs; + for (const auto *const ND : NDs) { + if (const ObjCMethodDecl *M = dyn_cast<ObjCMethodDecl>(ND)) { + if (const ObjCPropertyDecl *PDecl = M->findPropertyDecl()) { + Decls.push_back(dyn_cast<NamedDecl>(PDecl)); + } + } + } + + return Ctx.getRawCommentsForAnyRedecls(Decls); +} + const RawComment *clang::getPatternCompletionComment(const ASTContext &Ctx, const NamedDecl *ND) { const auto *M = dyn_cast_or_null<ObjCMethodDecl>(ND); Index: clang/lib/AST/RawCommentList.cpp =================================================================== --- clang/lib/AST/RawCommentList.cpp +++ clang/lib/AST/RawCommentList.cpp @@ -273,15 +273,6 @@ if (RC.isInvalid()) return; - // Check if the comments are not in source order. - while (!Comments.empty() && - !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getBeginLoc(), - RC.getBeginLoc())) { - // If they are, just pop a few last comments that don't fit. - // This happens if an \#include directive contains comments. - Comments.pop_back(); - } - // Ordinary comments are not interesting for us. if (RC.isOrdinary() && !CommentOpts.ParseAllComments) return; @@ -322,6 +313,57 @@ } else { Comments.push_back(new (Allocator) RawComment(RC)); } + + CommentsSorted = false; +} + +void RawCommentList::sort() const { + if (CommentsSorted) + return; + + llvm::sort(Comments.begin(), Comments.end(), + BeforeThanCompare<RawComment>(SourceMgr)); +} + +void RawCommentList::merge(const CommentOptions& CommentOpts) const { + if (Comments.size() < 2) + return; + + if (!CommentsSorted) + sort(); + + auto C1 = Comments.begin(); + auto C2 = C1; + ++C2; + + while (C2 != Comments.end()) { + std::pair<FileID, unsigned> Loc1Info = + SourceMgr.getDecomposedLoc((*C1)->getEndLoc()); + std::pair<FileID, unsigned> Loc2Info = + SourceMgr.getDecomposedLoc((*C2)->getBeginLoc()); + // FIXME: Ignoring overlapping comments. + if (Loc1Info.second > Loc2Info.second) { + ++C1; + ++C2; + continue; + } + + if (((*C1)->isTrailingComment() == (*C2)->isTrailingComment() || + ((*C1)->isTrailingComment() && !(*C2)->isTrailingComment() && + isOrdinaryKind((*C2)->getKind()) && + commentsStartOnSameColumn(SourceMgr, **C1, **C2))) && + onlyWhitespaceBetween(SourceMgr, Loc1Info, Loc2Info, + /*MaxNewlinesAllowed=*/1)) { + SourceRange MergedRange((*C1)->getBeginLoc(), (*C2)->getEndLoc()); + **C1 = RawComment(SourceMgr, MergedRange, CommentOpts, true); + Comments.erase(C2); + C2 = C1; + ++C2; + } else { + ++C1; + ++C2; + } + } } void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) { @@ -333,6 +375,8 @@ std::back_inserter(MergedComments), BeforeThanCompare<RawComment>(SourceMgr)); std::swap(Comments, MergedComments); + // FIXME: This could be relaxed in some cases. + CommentsSorted = false; } std::string RawComment::getFormattedText(const SourceManager &SourceMgr, Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -140,6 +140,46 @@ return DeclLoc; }; +static void getDeclLocationsForCommentSearch( + SourceManager &SrcMgr, + const std::unordered_map<const Decl *, std::vector<const Decl *>> + &DeclToRedecls, + std::unordered_map<FileID, + std::unordered_map<unsigned, std::vector<const Decl *>>> + &FileToLineToDecls, + std::unordered_map<FileID, + std::unordered_map<unsigned, std::vector<const Decl *>>> + &FileToOffsetToDecls) { + for (auto It : DeclToRedecls) { + if (llvm::Optional<SourceLocation> OptLoc = + getCandidateCommentLocation(SrcMgr, It.first)) { + auto Loc = OptLoc.getValue(); + const std::pair<FileID, unsigned> DecompLoc = + SrcMgr.getDecomposedLoc(Loc); + FileToOffsetToDecls[DecompLoc.first][DecompLoc.second].push_back( + It.first); + const unsigned Line = + SrcMgr.getLineNumber(DecompLoc.first, DecompLoc.second); + FileToLineToDecls[DecompLoc.first][Line].push_back(It.first); + } + } + + for (auto It : DeclToRedecls) { + for (auto *D : It.second) { + if (llvm::Optional<SourceLocation> OptLoc = + getCandidateCommentLocation(SrcMgr, D)) { + auto Loc = OptLoc.getValue(); + const std::pair<FileID, unsigned> DecompLoc = + SrcMgr.getDecomposedLoc(Loc); + FileToOffsetToDecls[DecompLoc.first][DecompLoc.second].push_back(D); + const unsigned Line = + SrcMgr.getLineNumber(DecompLoc.first, DecompLoc.second); + FileToLineToDecls[DecompLoc.first][Line].push_back(D); + } + } + } +} + // Assumes the comment is the closest comment before the decl - i. e. there's no // other comment between them. static bool IsCommentBeforeDecl(SourceManager &SrcMgr, @@ -174,6 +214,95 @@ return true; }; +std::unordered_map<const Decl *, RawComment *> +ASTContext::getRawCommentsForDeclsNoCache( + const std::unordered_map<const Decl *, std::vector<const Decl *>> &ReDecls) + const { + if (!CommentsLoaded && ExternalSource) { + ExternalSource->ReadComments(); + CommentsLoaded = true; + } + + std::unordered_map<const Decl *, RawComment *> Result; + + ArrayRef<RawComment *> RawComments = Comments.getComments(/*Sorted=*/false); + // If there are no comments anywhere, we won't find anything. + if (RawComments.empty()) + return Result; + + std::unordered_map<FileID, + std::unordered_map<unsigned, std::vector<const Decl *>>> + FileToLineToDecls; + std::unordered_map<FileID, + std::unordered_map<unsigned, std::vector<const Decl *>>> + FileToOffsetToDecls; + + getDeclLocationsForCommentSearch(SourceMgr, ReDecls, FileToLineToDecls, + FileToOffsetToDecls); + + auto CheckDeclsOnTheSameLine = [&, this](RawComment *Comment) { + const std::pair<FileID, unsigned> CommentBegin = + SourceMgr.getDecomposedLoc(Comment->getSourceRange().getBegin()); + + const auto DeclsInTheFileIt = FileToLineToDecls.find(CommentBegin.first); + if (DeclsInTheFileIt == FileToLineToDecls.end()) { + return; + } + + const unsigned CommentBeginLine = + SourceMgr.getLineNumber(CommentBegin.first, CommentBegin.second); + const auto DeclsOnTheLineIt = + DeclsInTheFileIt->second.find(CommentBeginLine); + if (DeclsOnTheLineIt == DeclsInTheFileIt->second.end()) { + return; + } + + for (auto *D : DeclsOnTheLineIt->second) { + if ((isa<FieldDecl>(D) || isa<EnumConstantDecl>(D) || isa<VarDecl>(D) || + isa<ObjCMethodDecl>(D) || isa<ObjCPropertyDecl>(D))) { + Result[D] = Comment; + } + } + }; + + auto CheckDeclsRightBehind = [&, this](RawComment *Comment) { + const std::pair<FileID, unsigned> CommentEnd = + SourceMgr.getDecomposedLoc(Comment->getSourceRange().getEnd()); + + // If the comment and the declaration aren't in the same file, then they + // aren't related. + const auto DeclsInTheFileIt = FileToOffsetToDecls.find(CommentEnd.first); + if (DeclsInTheFileIt == FileToOffsetToDecls.end()) { + return; + } + + std::pair<FileID, unsigned> TmpDeclLoc(DeclsInTheFileIt->first, + /* bogus */ 0); + for (const auto &OffsetToDeclsIt : DeclsInTheFileIt->second) { + TmpDeclLoc.second = OffsetToDeclsIt.first; + if (!IsCommentBeforeDecl(SourceMgr, CommentEnd, TmpDeclLoc)) + continue; + + // There can be multiple declaration at the same offset. + for (const auto &D : OffsetToDeclsIt.second) { + Result[D] = Comment; + } + } + }; + + for (auto RC : RawComments) { + if (LangOpts.CommentOpts.ParseAllComments || RC->isDocumentation()) { + if (RC->isTrailingComment()) { + CheckDeclsOnTheSameLine(RC); + } else { + CheckDeclsRightBehind(RC); + } + } + } + + return Result; +} + /// Returns true iff \param D can have a documentation comment attached. static bool CanDeclHaveDocComment(const Decl *D) { if (!D) @@ -236,7 +365,7 @@ // If we already tried to load comments but there are none, // we won't find anything. - if (CommentsLoaded && Comments.getComments().empty()) + if (CommentsLoaded && Comments.getComments(/*Sorted=*/false).empty()) return nullptr; if (!CanDeclHaveDocComment(D)) @@ -252,17 +381,15 @@ if (!CommentsLoaded && ExternalSource) { ExternalSource->ReadComments(); - -#ifndef NDEBUG - ArrayRef<RawComment *> RawComments = Comments.getComments(); - assert(std::is_sorted(RawComments.begin(), RawComments.end(), - BeforeThanCompare<RawComment>(SourceMgr))); -#endif - CommentsLoaded = true; } - ArrayRef<RawComment *> RawComments = Comments.getComments(); + ArrayRef<RawComment *> RawComments = Comments.getComments(/*Sorted=*/true); + +#ifndef NDEBUG + assert(std::is_sorted(RawComments.begin(), RawComments.end(), + BeforeThanCompare<RawComment>(SourceMgr))); +#endif // If there are no comments anywhere, we won't find anything. if (RawComments.empty()) @@ -418,6 +545,31 @@ return D; } +std::unordered_map<const Decl *, RawComment *> +ASTContext::getRawCommentsForAnyRedecls( + const std::vector<const Decl *> &Ds) const { + + // mapping from decl to it's re-decls + std::unordered_map<const Decl *, std::vector<const Decl *>> ReDecls; + ReDecls.reserve(Ds.size()); + + for (const auto *D : Ds) { + if (!CanDeclHaveDocComment(D)) + continue; + + D = adjustDeclToTemplate(D); + std::vector<const Decl *> RedeclsForDecl; + for (auto I : D->redecls()) { + if (I == D) + continue; + if (const auto RD = dyn_cast<NamedDecl>(I)) + RedeclsForDecl.push_back(RD); + } + ReDecls[D] = RedeclsForDecl; + } + return getRawCommentsForDeclsNoCache(ReDecls); +} + const RawComment *ASTContext::getRawCommentForAnyRedecl( const Decl *D, const Decl **OriginalDecl) const { @@ -492,7 +644,7 @@ void ASTContext::tryToAttachCommentsToDecls(ArrayRef<Decl *> Decls, const Preprocessor *PP) { // Explicitly not calling ExternalSource->ReadComments() as we're not // interested in those. - ArrayRef<RawComment *> RawComments = Comments.getComments(); + ArrayRef<RawComment *> RawComments = Comments.getComments(/*Sorted=*/false); if (RawComments.empty()) return; Index: clang/include/clang/Sema/CodeCompleteConsumer.h =================================================================== --- clang/include/clang/Sema/CodeCompleteConsumer.h +++ clang/include/clang/Sema/CodeCompleteConsumer.h @@ -31,6 +31,7 @@ #include <cassert> #include <memory> #include <string> +#include <unordered_map> #include <utility> namespace clang { @@ -927,17 +928,14 @@ /// /// \param Allocator The allocator that will be used to allocate the /// string itself. - CodeCompletionString *CreateCodeCompletionString(Sema &S, - const CodeCompletionContext &CCContext, - CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo, - bool IncludeBriefComments); - CodeCompletionString *CreateCodeCompletionString(ASTContext &Ctx, - Preprocessor &PP, - const CodeCompletionContext &CCContext, - CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo, - bool IncludeBriefComments); + CodeCompletionString *CreateCodeCompletionString( + Sema &S, const CodeCompletionContext &CCContext, + CodeCompletionAllocator &Allocator, CodeCompletionTUInfo &CCTUInfo, + bool IncludeBriefComments, const RawComment *Comment = nullptr); + CodeCompletionString *CreateCodeCompletionString( + ASTContext &Ctx, Preprocessor &PP, const CodeCompletionContext &CCContext, + CodeCompletionAllocator &Allocator, CodeCompletionTUInfo &CCTUInfo, + bool IncludeBriefComments, const RawComment *Comment = nullptr); /// Creates a new code-completion string for the macro result. Similar to the /// above overloads, except this only requires preprocessor information. /// The result kind must be `RK_Macro`. @@ -949,7 +947,7 @@ CodeCompletionString *createCodeCompletionStringForDecl( Preprocessor &PP, ASTContext &Ctx, CodeCompletionBuilder &Result, bool IncludeBriefComments, const CodeCompletionContext &CCContext, - PrintingPolicy &Policy); + PrintingPolicy &Policy, const RawComment *Comment = nullptr); CodeCompletionString *createCodeCompletionStringForOverride( Preprocessor &PP, ASTContext &Ctx, CodeCompletionBuilder &Result, @@ -1148,6 +1146,11 @@ const RawComment *getCompletionComment(const ASTContext &Ctx, const NamedDecl *Decl); +/// Return comments attributed to \param NDs in \param Ctx +std::unordered_map<const Decl *, RawComment *> +getCompletionComments(const ASTContext &Ctx, + const std::vector<const Decl *> &Ds); + /// Get the documentation comment used to produce /// CodeCompletionString::BriefComment for RK_Pattern. const RawComment *getPatternCompletionComment(const ASTContext &Ctx, Index: clang/include/clang/Basic/SourceLocation.h =================================================================== --- clang/include/clang/Basic/SourceLocation.h +++ clang/include/clang/Basic/SourceLocation.h @@ -472,4 +472,22 @@ } // namespace llvm +namespace std { +template <> struct hash<clang::SourceLocation> { + typedef clang::SourceLocation argument_type; + typedef unsigned result_type; + result_type operator()(argument_type const &SL) const noexcept { + return SL.getRawEncoding(); + } +}; + +template <> struct hash<clang::FileID> { + typedef clang::FileID argument_type; + typedef int result_type; + result_type operator()(argument_type const &FID) const noexcept { + return FID.getHashValue(); + } +}; +} // namespace std + #endif // LLVM_CLANG_BASIC_SOURCELOCATION_H Index: clang/include/clang/AST/RawCommentList.h =================================================================== --- clang/include/clang/AST/RawCommentList.h +++ clang/include/clang/AST/RawCommentList.h @@ -187,22 +187,46 @@ } }; -/// This class represents all comments included in the translation unit, -/// sorted in order of appearance in the translation unit. +/// This class represents all comments included in the translation unit. class RawCommentList { public: - RawCommentList(SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} + RawCommentList(SourceManager &SourceMgr) + : SourceMgr(SourceMgr), CommentsSorted(true) {} void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator); - ArrayRef<RawComment *> getComments() const { + ArrayRef<RawComment *> getComments(bool Sorted) const { + if (Sorted && !CommentsSorted) + sort(); return Comments; } + /// Sorts comments using BeforeThanCompare<RawComment> ordering. + void sort() const; + + /// Merge comments only if there is only whitespace between them. + /// Merge comments if they are on same or consecutive lines. + /// Sorts comments unless they are already sorted. + /// + /// Can't merge trailing and non-trailing comments unless the second is + /// non-trailing ordinary in the same column, as in the case: + /// int x; // documents x + /// // more text + /// versus: + /// int x; // documents x + /// int y; // documents y + /// or: + /// int x; // documents x + /// // documents y + /// int y; + void merge(const CommentOptions& CommentOpts) const; + private: SourceManager &SourceMgr; - std::vector<RawComment *> Comments; + + mutable std::vector<RawComment *> Comments; + mutable bool CommentsSorted; void addDeserializedComments(ArrayRef<RawComment *> DeserializedComments); Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -794,6 +794,19 @@ /// without looking into cache. RawComment *getRawCommentForDeclNoCache(const Decl *D) const; + /// Return the documentation comments attached to a given declarations, + /// without looking into cache. + /// The result doesn't contain decls that don't have any comment attached. + std::unordered_map<const Decl *, RawComment *> getRawCommentsForDeclsNoCache( + const std::unordered_map<const Decl *, std::vector<const Decl *>> + &ReDecls) const; + + /// Return the documentation comment attached to any redeclaration of given + /// declarations. The result doesn't contain decls that don't have any comment + /// attached. + std::unordered_map<const Decl *, RawComment *> + getRawCommentsForAnyRedecls(const std::vector<const Decl *> &NDs) const; + public: RawCommentList &getRawCommentList() { return Comments;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits