================ @@ -2190,3 +2192,30 @@ static bool FormatTemplateTypeDiff(ASTContext &Context, QualType FromType, TD.DiffTemplate(); return TD.Emit(); } + +std::string clang::FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T) { + auto IsSingleCodeUnitCP = [](unsigned Value, QualType T) { + if (T->isChar8Type()) { + assert(Value <= 0xFF && "not a valid UTF-8 code unit"); + return Value <= 0x7F; + } + if (T->isChar16Type()) { + assert(Value <= 0xFFFF && "not a valid UTF-16 code unit"); + return llvm::IsSingleCodeUnitUTF16Codepoint(Value); + } + return llvm::IsSingleCodeUnitUTF32Codepoint(Value); + }; + llvm::SmallVector<char, 4> Str; + if (!IsSingleCodeUnitCP(Value, T)) { + llvm::raw_svector_ostream OS(Str); + OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">"; + return std::string(Str.begin(), Str.end()); + } ---------------- tahonermann wrote:
The common case here will be a large value that won't fix in a `char` vector of length 4; especially considering the leading `<0x` and trailing `>\0`. ```suggestion llvm::SmallVector<char, 16> Str; if (!IsSingleCodeUnitCP(Value, T)) { llvm::raw_svector_ostream OS(Str); OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">"; return std::string(Str.begin(), Str.end()); } ``` https://github.com/llvm/llvm-project/pull/138708 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits