Author: rsmith Date: Wed Dec 9 19:11:47 2015 New Revision: 255198 URL: http://llvm.org/viewvc/llvm-project?rev=255198&view=rev Log: PR25416: Improve performance of processing inline assembly consisting of many implicitly-concatenated string literals. When looking for the start of a token in the inline assembly, start from the end of the previous token, not the start of the entire string.
Patch by Yunlian Jiang! Modified: cfe/trunk/include/clang/AST/Expr.h cfe/trunk/lib/AST/Expr.cpp cfe/trunk/lib/CodeGen/CGStmt.cpp Modified: cfe/trunk/include/clang/AST/Expr.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Expr.h?rev=255198&r1=255197&r2=255198&view=diff ============================================================================== --- cfe/trunk/include/clang/AST/Expr.h (original) +++ cfe/trunk/include/clang/AST/Expr.h Wed Dec 9 19:11:47 2015 @@ -1631,13 +1631,15 @@ public: /// and can have escape sequences in them in addition to the usual trigraph /// and escaped newline business. This routine handles this complexity. /// - SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM, - const LangOptions &Features, - const TargetInfo &Target) const; + SourceLocation + getLocationOfByte(unsigned ByteNo, const SourceManager &SM, + const LangOptions &Features, const TargetInfo &Target, + unsigned *StartToken = nullptr, + unsigned *StartTokenByteOffset = nullptr) const; typedef const SourceLocation *tokloc_iterator; tokloc_iterator tokloc_begin() const { return TokLocs; } - tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; } + tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; } SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; } SourceLocation getLocEnd() const LLVM_READONLY { Modified: cfe/trunk/lib/AST/Expr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=255198&r1=255197&r2=255198&view=diff ============================================================================== --- cfe/trunk/lib/AST/Expr.cpp (original) +++ cfe/trunk/lib/AST/Expr.cpp Wed Dec 9 19:11:47 2015 @@ -1007,15 +1007,33 @@ void StringLiteral::setString(const ASTC /// can have escape sequences in them in addition to the usual trigraph and /// escaped newline business. This routine handles this complexity. /// -SourceLocation StringLiteral:: -getLocationOfByte(unsigned ByteNo, const SourceManager &SM, - const LangOptions &Features, const TargetInfo &Target) const { +/// The *StartToken sets the first token to be searched in this function and +/// the *StartTokenByteOffset is the byte offset of the first token. Before +/// returning, it updates the *StartToken to the TokNo of the token being found +/// and sets *StartTokenByteOffset to the byte offset of the token in the +/// string. +/// Using these two parameters can reduce the time complexity from O(n^2) to +/// O(n) if one wants to get the location of byte for all the tokens in a +/// string. +/// +SourceLocation +StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM, + const LangOptions &Features, + const TargetInfo &Target, unsigned *StartToken, + unsigned *StartTokenByteOffset) const { assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) && "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that // contains the byte we're looking for. unsigned TokNo = 0; + unsigned StringOffset = 0; + if (StartToken) + TokNo = *StartToken; + if (StartTokenByteOffset) { + StringOffset = *StartTokenByteOffset; + ByteNo -= StringOffset; + } while (1) { assert(TokNo < getNumConcatenated() && "Invalid byte number!"); SourceLocation StrTokLoc = getStrTokenLoc(TokNo); @@ -1024,14 +1042,20 @@ getLocationOfByte(unsigned ByteNo, const // the string literal, not the identifier for the macro it is potentially // expanded through. SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc); - + // Re-lex the token to get its length and original spelling. - std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc); + std::pair<FileID, unsigned> LocInfo = + SM.getDecomposedLoc(StrTokSpellingLoc); bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); - if (Invalid) + if (Invalid) { + if (StartTokenByteOffset != nullptr) + *StartTokenByteOffset = StringOffset; + if (StartToken != nullptr) + *StartToken = TokNo; return StrTokSpellingLoc; - + } + const char *StrData = Buffer.data()+LocInfo.second; // Create a lexer starting at the beginning of this token. @@ -1047,14 +1071,19 @@ getLocationOfByte(unsigned ByteNo, const // If the byte is in this token, return the location of the byte. if (ByteNo < TokNumBytes || (ByteNo == TokNumBytes && TokNo == getNumConcatenated() - 1)) { - unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); - + unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); + // Now that we know the offset of the token in the spelling, use the // preprocessor to get the offset in the original source. + if (StartTokenByteOffset != nullptr) + *StartTokenByteOffset = StringOffset; + if (StartToken != nullptr) + *StartToken = TokNo; return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features); } - + // Move to the next string token. + StringOffset += TokNumBytes; ++TokNo; ByteNo -= TokNumBytes; } Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=255198&r1=255197&r2=255198&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGStmt.cpp (original) +++ cfe/trunk/lib/CodeGen/CGStmt.cpp Wed Dec 9 19:11:47 2015 @@ -1714,13 +1714,15 @@ static llvm::MDNode *getAsmSrcLocInfo(co if (!StrVal.empty()) { const SourceManager &SM = CGF.CGM.getContext().getSourceManager(); const LangOptions &LangOpts = CGF.CGM.getLangOpts(); + unsigned StartToken = 0; + unsigned ByteOffset = 0; // Add the location of the start of each subsequent line of the asm to the // MDNode. - for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) { + for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) { if (StrVal[i] != '\n') continue; - SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts, - CGF.getTarget()); + SourceLocation LineLoc = Str->getLocationOfByte( + i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset); Locs.push_back(llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding()))); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits