Author: jlebar Date: Thu Sep 29 19:38:45 2016 New Revision: 282822 URL: http://llvm.org/viewvc/llvm-project?rev=282822&view=rev Log: Move UTF functions into namespace llvm.
Summary: This lets people link against LLVM and their own version of the UTF library. I determined this only affects llvm, clang, lld, and lldb by running $ git grep -wl 'UTF[0-9]\+\|\bConvertUTF\bisLegalUTF\|getNumBytesFor' | cut -f 1 -d '/' | sort | uniq clang lld lldb llvm Tested with ninja lldb ninja check-clang check-llvm check-lld (ninja check-lldb doesn't complete for me with or without this patch.) Reviewers: rnk Subscribers: klimek, beanz, mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D24996 Modified: cfe/trunk/lib/Analysis/FormatString.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/Format/Encoding.h cfe/trunk/lib/Frontend/TextDiagnostic.cpp cfe/trunk/lib/Lex/Lexer.cpp cfe/trunk/lib/Lex/LiteralSupport.cpp cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/lib/Sema/SemaExpr.cpp Modified: cfe/trunk/lib/Analysis/FormatString.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/FormatString.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Analysis/FormatString.cpp (original) +++ cfe/trunk/lib/Analysis/FormatString.cpp Thu Sep 29 19:38:45 2016 @@ -266,14 +266,15 @@ bool clang::analyze_format_string::Parse if (SpecifierBegin + 1 >= FmtStrEnd) return false; - const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1); - const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd); + const llvm::UTF8 *SB = + reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1); + const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd); const char FirstByte = *SB; // If the invalid specifier is a multibyte UTF-8 string, return the // total length accordingly so that the conversion specifier can be // properly updated to reflect a complete UTF-8 specifier. - unsigned NumBytes = getNumBytesForUTF8(FirstByte); + unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte); if (NumBytes == 1) return false; if (SB + NumBytes > SE) Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Thu Sep 29 19:38:45 2016 @@ -3136,13 +3136,12 @@ GetConstantCFStringEntry(llvm::StringMap // Otherwise, convert the UTF8 literals into a string of shorts. IsUTF16 = true; - SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls. - const UTF8 *FromPtr = (const UTF8 *)String.data(); - UTF16 *ToPtr = &ToBuf[0]; + SmallVector<llvm::UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls. + const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); + llvm::UTF16 *ToPtr = &ToBuf[0]; - (void)ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, - &ToPtr, ToPtr + NumBytes, - strictConversion); + (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, + ToPtr + NumBytes, llvm::strictConversion); // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; Modified: cfe/trunk/lib/Format/Encoding.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Encoding.h?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Format/Encoding.h (original) +++ cfe/trunk/lib/Format/Encoding.h Thu Sep 29 19:38:45 2016 @@ -33,16 +33,17 @@ enum Encoding { /// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8, /// it is considered UTF8, otherwise we treat it as some 8-bit encoding. inline Encoding detectEncoding(StringRef Text) { - const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin()); - const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end()); - if (::isLegalUTF8String(&Ptr, BufEnd)) + const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin()); + const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end()); + if (llvm::isLegalUTF8String(&Ptr, BufEnd)) return Encoding_UTF8; return Encoding_Unknown; } inline unsigned getCodePointCountUTF8(StringRef Text) { unsigned CodePoints = 0; - for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) { + for (size_t i = 0, e = Text.size(); i < e; + i += llvm::getNumBytesForUTF8(Text[i])) { ++CodePoints; } return CodePoints; @@ -97,7 +98,7 @@ inline unsigned columnWidthWithTabs(Stri inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { switch (Encoding) { case Encoding_UTF8: - return getNumBytesForUTF8(FirstChar); + return llvm::getNumBytesForUTF8(FirstChar); default: return 1; } @@ -136,7 +137,7 @@ inline unsigned getEscapeSequenceLength( ++I; return I; } - return 1 + getNumBytesForUTF8(Text[1]); + return 1 + llvm::getNumBytesForUTF8(Text[1]); } } Modified: cfe/trunk/lib/Frontend/TextDiagnostic.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/TextDiagnostic.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Frontend/TextDiagnostic.cpp (original) +++ cfe/trunk/lib/Frontend/TextDiagnostic.cpp Thu Sep 29 19:38:45 2016 @@ -119,16 +119,17 @@ printableTextForNextCharacter(StringRef begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i)); end = begin + (SourceLine.size() - *i); - if (isLegalUTF8Sequence(begin, end)) { - UTF32 c; - UTF32 *cptr = &c; + if (llvm::isLegalUTF8Sequence(begin, end)) { + llvm::UTF32 c; + llvm::UTF32 *cptr = &c; unsigned char const *original_begin = begin; - unsigned char const *cp_end = begin+getNumBytesForUTF8(SourceLine[*i]); + unsigned char const *cp_end = + begin + llvm::getNumBytesForUTF8(SourceLine[*i]); - ConversionResult res = ConvertUTF8toUTF32(&begin, cp_end, &cptr, cptr+1, - strictConversion); + llvm::ConversionResult res = llvm::ConvertUTF8toUTF32( + &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion); (void)res; - assert(conversionOK==res); + assert(llvm::conversionOK == res); assert(0 < begin-original_begin && "we must be further along in the string now"); *i += begin-original_begin; Modified: cfe/trunk/lib/Lex/Lexer.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Lex/Lexer.cpp (original) +++ cfe/trunk/lib/Lex/Lexer.cpp Thu Sep 29 19:38:45 2016 @@ -1485,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(cons bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { const char *UnicodePtr = CurPtr; - UTF32 CodePoint; - ConversionResult Result = - llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, - (const UTF8 *)BufferEnd, + llvm::UTF32 CodePoint; + llvm::ConversionResult Result = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Result != conversionOK || + llvm::strictConversion); + if (Result != llvm::conversionOK || !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) return false; @@ -3625,17 +3625,17 @@ LexNextToken: break; } - UTF32 CodePoint; + llvm::UTF32 CodePoint; // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - ConversionResult Status = - llvm::convertUTF8Sequence((const UTF8 **)&CurPtr, - (const UTF8 *)BufferEnd, + llvm::ConversionResult Status = + llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, + (const llvm::UTF8 *)BufferEnd, &CodePoint, - strictConversion); - if (Status == conversionOK) { + llvm::strictConversion); + if (Status == llvm::conversionOK) { if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) { if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine)) return true; // KeepWhitespaceMode Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original) +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Thu Sep 29 19:38:45 2016 @@ -402,7 +402,7 @@ static void EncodeUCNEscape(const char * if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf); + llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf); *ResultPtr = UcnVal; ResultBuf += 4; return; @@ -411,7 +411,7 @@ static void EncodeUCNEscape(const char * if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf); + llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf); if (UcnVal <= (UTF32)0xFFFF) { *ResultPtr = UcnVal; @@ -1114,11 +1114,11 @@ CharLiteralParser::CharLiteralParser(con char const *tmp_in_start = start; uint32_t *tmp_out_start = buffer_begin; - ConversionResult res = - ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start), - reinterpret_cast<UTF8 const *>(begin), - &buffer_begin, buffer_end, strictConversion); - if (res != conversionOK) { + llvm::ConversionResult res = + llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start), + reinterpret_cast<llvm::UTF8 const *>(begin), + &buffer_begin, buffer_end, llvm::strictConversion); + if (res != llvm::conversionOK) { // If we see bad encoding for unprefixed character literals, warn and // simply copy the byte values, for compatibility with gcc and // older versions of clang. @@ -1510,13 +1510,13 @@ void StringLiteralParser::init(ArrayRef< if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr); + llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr); *ResultWidePtr = ResultChar; ResultPtr += 4; } else if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr); + llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr); *ResultWidePtr = ResultChar & 0xFFFF; ResultPtr += 2; } else { @@ -1531,12 +1531,12 @@ void StringLiteralParser::init(ArrayRef< if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data()); + llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data()); ResultWidePtr[0] = GetNumStringChars() - 1; } else if (CharByteWidth == 2) { // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. - UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data()); + llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data()); ResultWidePtr[0] = GetNumStringChars() - 1; } else { assert(CharByteWidth == 1 && "Unexpected char width"); @@ -1570,7 +1570,7 @@ void StringLiteralParser::init(ArrayRef< static const char *resyncUTF8(const char *Err, const char *End) { if (Err == End) return End; - End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err); + End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err); while (++Err != End && (*Err & 0xC0) == 0x80) ; return Err; @@ -1582,7 +1582,7 @@ static const char *resyncUTF8(const char bool StringLiteralParser::CopyStringFragment(const Token &Tok, const char *TokBegin, StringRef Fragment) { - const UTF8 *ErrorPtrTmp; + const llvm::UTF8 *ErrorPtrTmp; if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp)) return false; Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Sep 29 19:38:45 2016 @@ -3262,15 +3262,15 @@ bool Sema::CheckObjCString(Expr *Arg) { if (Literal->containsNonAsciiOrNull()) { StringRef String = Literal->getString(); unsigned NumBytes = String.size(); - SmallVector<UTF16, 128> ToBuf(NumBytes); - const UTF8 *FromPtr = (const UTF8 *)String.data(); - UTF16 *ToPtr = &ToBuf[0]; - - ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, - &ToPtr, ToPtr + NumBytes, - strictConversion); + SmallVector<llvm::UTF16, 128> ToBuf(NumBytes); + const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); + llvm::UTF16 *ToPtr = &ToBuf[0]; + + llvm::ConversionResult Result = + llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, + ToPtr + NumBytes, llvm::strictConversion); // Check for conversion failure. - if (Result != conversionOK) + if (Result != llvm::conversionOK) Diag(Arg->getLocStart(), diag::warn_cfstring_truncated) << Arg->getSourceRange(); } @@ -4777,16 +4777,16 @@ CheckFormatHandler::HandleInvalidConvers // hex value. std::string CodePointStr; if (!llvm::sys::locale::isPrint(*csStart)) { - UTF32 CodePoint; - const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart); - const UTF8 *E = - reinterpret_cast<const UTF8 *>(csStart + csLen); - ConversionResult Result = - llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion); + llvm::UTF32 CodePoint; + const llvm::UTF8 **B = reinterpret_cast<const llvm::UTF8 **>(&csStart); + const llvm::UTF8 *E = + reinterpret_cast<const llvm::UTF8 *>(csStart + csLen); + llvm::ConversionResult Result = + llvm::convertUTF8Sequence(B, E, &CodePoint, llvm::strictConversion); - if (Result != conversionOK) { + if (Result != llvm::conversionOK) { unsigned char FirstChar = *csStart; - CodePoint = (UTF32)FirstChar; + CodePoint = (llvm::UTF32)FirstChar; } llvm::raw_string_ostream OS(CodePointStr); Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=282822&r1=282821&r2=282822&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Thu Sep 29 19:38:45 2016 @@ -3070,8 +3070,9 @@ static void ConvertUTF8ToWideString(unsi SmallString<32> &Target) { Target.resize(CharByteWidth * (Source.size() + 1)); char *ResultPtr = &Target[0]; - const UTF8 *ErrorPtr; - bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); + const llvm::UTF8 *ErrorPtr; + bool success = + llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr); (void)success; assert(success); Target.resize(ResultPtr - &Target[0]); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits