[Lldb-commits] [lldb] [lldb] Implement ANSI & Unicode aware string stripping & padding (PR #130878)

Pavel Labath via lldb-commits Wed, 12 Mar 2025 04:56:18 -0700

================
@@ -172,28 +175,99 @@ inline std::string 
FormatAnsiTerminalCodes(llvm::StringRef format,
   return fmt;
 }
 
+inline std::tuple<llvm::StringRef, llvm::StringRef, llvm::StringRef>
+FindNextAnsiSequence(llvm::StringRef str) {
+  llvm::StringRef left;
+  llvm::StringRef right = str;
+
+  while (!right.empty()) {
+    const size_t start = right.find(ANSI_ESC_START);
+
+    // ANSI_ESC_START not found.
+    if (start == llvm::StringRef::npos)
+      return {str, {}, {}};
+
+    // Split the string around the current ANSI_ESC_START.
+    left = str.take_front(left.size() + start);
+    llvm::StringRef escape = right.substr(start);
+    right = right.substr(start + ANSI_ESC_START_LEN + 1);
+
+    const size_t end = right.find_first_not_of("0123456789;");
+
+    // ANSI_ESC_END found.
+    if (end < right.size() && (right[end] == 'm' || right[end] == 'G'))
+      return {left, escape.take_front(ANSI_ESC_START_LEN + 1 + end + 1),
+              right.substr(end + 1)};
+
+    // Maintain the invariant that str == left + right at the start of the 
loop.
+    left = str.take_front(left.size() + ANSI_ESC_START_LEN + 1);
+  }
+
+  return {str, {}, {}};
+}
+
 inline std::string StripAnsiTerminalCodes(llvm::StringRef str) {
   std::string stripped;
   while (!str.empty()) {
-    llvm::StringRef left, right;
-
-    std::tie(left, right) = str.split(ANSI_ESC_START);
+    auto [left, escape, right] = FindNextAnsiSequence(str);
     stripped += left;
+    str = right;
+  }
+  return stripped;
+}
 
-    // ANSI_ESC_START not found.
-    if (left == str && right.empty())
-      break;
+inline std::string TrimAndPad(llvm::StringRef str, size_t visible_length,
+                              char padding = ' ') {
+  std::string result;
+  result.reserve(visible_length);
+  size_t result_visibile_length = 0;
+
+  // Trim the string to the given visible length.
+  while (!str.empty()) {
+    auto [left, escape, right] = FindNextAnsiSequence(str);
+    str = right;
 
-    size_t end = right.find_first_not_of("0123456789;");
-    if (end < right.size() && (right[end] == 'm' || right[end] == 'G')) {
-      str = right.substr(end + 1);
-    } else {
-      // ANSI_ESC_END not found.
-      stripped += ANSI_ESC_START;
-      str = right;
+    // Compute the length of the string without escape codes. If it fits, 
append
+    // it together with the invisible escape code.
+    size_t column_width = llvm::sys::locale::columnWidth(left);
+    if (result_visibile_length + column_width <= visible_length) {
+      result.append(left).append(escape);
+      result_visibile_length += column_width;
+      continue;
+    }
+
+    // The string doesn't fit but doesn't fit but doesn't contain unicode.
+    // Append the substring that fits.
+    if (column_width == left.size()) {
+      llvm::StringRef trimmed =
+          left.take_front(visible_length - result_visibile_length);
+      result.append(trimmed);
+      result_visibile_length += visible_length - result_visibile_length;
+      continue;
+    }
----------------
labath wrote:


I think this optimization is not correct due to unicode characters which take 
up two spaces. If they're encoded using two bytes then this check will pass, 
but it's not legal to split it in half.

https://github.com/llvm/llvm-project/pull/130878
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

[Lldb-commits] [lldb] [lldb] Implement ANSI & Unicode aware string stripping & padding (PR #130878)

Reply via email to