================
@@ -2190,3 +2192,30 @@ static bool FormatTemplateTypeDiff(ASTContext &Context, 
QualType FromType,
   TD.DiffTemplate();
   return TD.Emit();
 }
+
+std::string clang::FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T) {
+  auto IsSingleCodeUnitCP = [](unsigned Value, QualType T) {
+    if (T->isChar8Type()) {
+      assert(Value <= 0xFF && "not a valid UTF-8 code unit");
+      return Value <= 0x7F;
+    }
+    if (T->isChar16Type()) {
+      assert(Value <= 0xFFFF && "not a valid UTF-16 code unit");
+      return llvm::IsSingleCodeUnitUTF16Codepoint(Value);
+    }
+    return llvm::IsSingleCodeUnitUTF32Codepoint(Value);
+  };
+  llvm::SmallVector<char, 4> Str;
+  if (!IsSingleCodeUnitCP(Value, T)) {
+    llvm::raw_svector_ostream OS(Str);
+    OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">";
+    return std::string(Str.begin(), Str.end());
+  }
----------------
tahonermann wrote:

The common case here will be a large value that won't fix in a `char` vector of 
length 4; especially considering the leading `<0x` and trailing `>\0`.
```suggestion
  llvm::SmallVector<char, 16> Str;
  if (!IsSingleCodeUnitCP(Value, T)) {
    llvm::raw_svector_ostream OS(Str);
    OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">";
    return std::string(Str.begin(), Str.end());
  }
```

https://github.com/llvm/llvm-project/pull/138708
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to