https://github.com/DavidSpickett created 
https://github.com/llvm/llvm-project/pull/180947

As seen in #177570, this code has a bunch of corner cases, does not handle ANSI 
codes properly and does not handle unicode at all. That's enough to fix that we 
need some tests to make it clear where we're starting from.

The body of OutputFormattedUsageText is moved into a utility in the 
AnsiTerminal.h header and tests added to the existing AnsiTerminalTest.cpp.

Some results are known to be wrong. Some that cause crashes are commented out, 
to be enabled once fixed.

>From 090a0d09e98634075435ae74034ea431758eeb98 Mon Sep 17 00:00:00 2001
From: David Spickett <[email protected]>
Date: Mon, 9 Feb 2026 15:29:59 +0000
Subject: [PATCH] [lldb] Move parts of OutputFormattedUsageText into utility
 function

As seen in #177570, this code has a bunch of corner cases,
does not handle ANSI codes properly and does not handle
unicode at all. That's enough to fix that we need some
tests to make it clear where we're starting from.

The body of OutputFormattedUsageText is moved into a utility
in the AnsiTerminal.h header and tests added to the existing
AnsiTerminalTest.cpp.

Some results are known to be wrong. Some that cause crashes
are commented out, to be enabled once fixed.
---
 lldb/include/lldb/Utility/AnsiTerminal.h    | 64 +++++++++++++++++++
 lldb/source/Interpreter/Options.cpp         | 51 +---------------
 lldb/unittests/Utility/AnsiTerminalTest.cpp | 68 +++++++++++++++++++++
 3 files changed, 133 insertions(+), 50 deletions(-)

diff --git a/lldb/include/lldb/Utility/AnsiTerminal.h 
b/lldb/include/lldb/Utility/AnsiTerminal.h
index aaaf94c6c5f7f..0e696e49eda0b 100644
--- a/lldb/include/lldb/Utility/AnsiTerminal.h
+++ b/lldb/include/lldb/Utility/AnsiTerminal.h
@@ -99,6 +99,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Locale.h"
 
+#include "lldb/Utility/Stream.h"
+
 #include <string>
 
 namespace lldb_private {
@@ -287,6 +289,68 @@ inline size_t ColumnWidth(llvm::StringRef str) {
   return llvm::sys::locale::columnWidth(stripped);
 }
 
+// Output text that may contain ANSI codes, word wrapped (wrapped at 
whitespace)
+// to the given stream. The indent level of the stream is counted towards the
+// output line length.
+// FIXME: This contains several bugs and does not handle unicode.
+inline void OutputWordWrappedLines(Stream &strm,
+                                   // FIXME: should be StringRef, but triggers
+                                   // a crash when changed.
+                                   const std::string &text,
+                                   uint32_t output_max_columns) {
+  const size_t visible_length = ansi::ColumnWidth(text);
+
+  // Will it all fit on one line?
+  if (static_cast<uint32_t>(visible_length + strm.GetIndentLevel()) <
+      output_max_columns) {
+    // Output it as a single line.
+    strm.Indent(text);
+    strm.EOL();
+    return;
+  }
+
+  // We need to break it up into multiple lines. We can do this based on the
+  // formatted text because we know that:
+  // * We only break lines on whitespace, therefore we will not break in the
+  //   middle of a Unicode character or escape code.
+  // * Escape codes are so far not applied to multiple words, so there is no
+  //   risk of breaking up a phrase and the escape code being incorrectly
+  //   applied to the indent too.
+
+  const int max_text_width = output_max_columns - strm.GetIndentLevel() - 1;
+  int start = 0;
+  int end = start;
+  const int final_end = visible_length;
+
+  while (end < final_end) {
+    // Don't start the 'text' on a space, since we're already outputting the
+    // indentation.
+    while ((start < final_end) && (text[start] == ' '))
+      start++;
+
+    end = start + max_text_width;
+    if (end > final_end) {
+      end = final_end;
+    } else {
+      // If we're not at the end of the text, make sure we break the line on
+      // white space.
+      while (end > start && text[end] != ' ' && text[end] != '\t' &&
+             text[end] != '\n')
+        end--;
+    }
+
+    const int sub_len = end - start;
+    if (start != 0)
+      strm.EOL();
+    strm.Indent();
+    assert(start < final_end);
+    assert(start + sub_len <= final_end);
+    strm.PutCString(llvm::StringRef(text.c_str() + start, sub_len));
+    start = end + 1;
+  }
+  strm.EOL();
+}
+
 } // namespace ansi
 } // namespace lldb_private
 
diff --git a/lldb/source/Interpreter/Options.cpp 
b/lldb/source/Interpreter/Options.cpp
index 521882ab3be8c..0bda2a912e1a1 100644
--- a/lldb/source/Interpreter/Options.cpp
+++ b/lldb/source/Interpreter/Options.cpp
@@ -274,57 +274,8 @@ void Options::OutputFormattedUsageText(Stream &strm,
   }
   actual_text.append(
       ansi::FormatAnsiTerminalCodes(option_def.usage_text, use_color));
-  const size_t visible_length = ansi::ColumnWidth(actual_text);
-
-  // Will it all fit on one line?
-  if (static_cast<uint32_t>(visible_length + strm.GetIndentLevel()) <
-      output_max_columns) {
-    // Output it as a single line.
-    strm.Indent(actual_text);
-    strm.EOL();
-    return;
-  }
 
-  // We need to break it up into multiple lines. We can do this based on the
-  // formatted text because we know that:
-  // * We only break lines on whitespace, therefore we will not break in the
-  //   middle of a Unicode character or escape code.
-  // * Escape codes are so far not applied to multiple words, so there is no
-  //   risk of breaking up a phrase and the escape code being incorrectly
-  //   applied to the indent too.
-
-  const int max_text_width = output_max_columns - strm.GetIndentLevel() - 1;
-  int start = 0;
-  int end = start;
-  const int final_end = visible_length;
-
-  while (end < final_end) {
-    // Don't start the 'text' on a space, since we're already outputting the
-    // indentation.
-    while ((start < final_end) && (actual_text[start] == ' '))
-      start++;
-
-    end = start + max_text_width;
-    if (end > final_end) {
-      end = final_end;
-    } else {
-      // If we're not at the end of the text, make sure we break the line on
-      // white space.
-      while (end > start && actual_text[end] != ' ' &&
-             actual_text[end] != '\t' && actual_text[end] != '\n')
-        end--;
-    }
-
-    const int sub_len = end - start;
-    if (start != 0)
-      strm.EOL();
-    strm.Indent();
-    assert(start < final_end);
-    assert(start + sub_len <= final_end);
-    strm.PutCString(llvm::StringRef(actual_text.c_str() + start, sub_len));
-    start = end + 1;
-  }
-  strm.EOL();
+  ansi::OutputWordWrappedLines(strm, actual_text, output_max_columns);
 }
 
 bool Options::SupportsLongOption(const char *long_option) {
diff --git a/lldb/unittests/Utility/AnsiTerminalTest.cpp 
b/lldb/unittests/Utility/AnsiTerminalTest.cpp
index cef73ffaf9136..19c27de4cd886 100644
--- a/lldb/unittests/Utility/AnsiTerminalTest.cpp
+++ b/lldb/unittests/Utility/AnsiTerminalTest.cpp
@@ -9,6 +9,7 @@
 #include "gtest/gtest.h"
 
 #include "lldb/Utility/AnsiTerminal.h"
+#include "lldb/Utility/StreamString.h"
 
 using namespace lldb_private;
 
@@ -116,3 +117,70 @@ TEST(AnsiTerminal, TrimAndPad) {
   EXPECT_EQ("12❤️4❤️", ansi::TrimAndPad("12❤️4❤️", 5));
   EXPECT_EQ("12❤️45", ansi::TrimAndPad("12❤️45❤️", 5));
 }
+
+static void TestLines(const std::string &input, int indent,
+                      uint32_t output_max_columns,
+                      const llvm::StringRef &expected) {
+  StreamString strm;
+  strm.SetIndentLevel(indent);
+  ansi::OutputWordWrappedLines(strm, input, output_max_columns);
+  EXPECT_EQ(expected, strm.GetString());
+}
+
+TEST(AnsiTerminal, OutputWordWrappedLines) {
+  TestLines("", 0, 0, "\n");
+  TestLines("", 0, 1, "\n");
+  TestLines("", 2, 1, "\n");
+
+  // FIXME: crashes
+  // TestLines("abc", 0, 1, "\n");
+  // FIXME: crashes
+  // TestLines("abc", 0, 2, "\n");
+  // FIXME: should be "ab\nc\n"
+  TestLines("abc", 0, 3, "\n\nc\n");
+  TestLines("abc", 0, 4, "abc\n");
+  // Indent is counted as using up columns.
+  TestLines("abc", 1, 5, " abc\n");
+  // FIXME: This output is correctly indented but the content
+  // is mangled.
+  TestLines("abc", 2, 5, "  \n  \n  c\n");
+
+  // FIXME: Should skip leading whitespace and result in "abc\n".
+  TestLines("  abc", 0, 4, "\n\nbc\n");
+
+  // FIXME: Should be "abc\ndef\n".
+  TestLines("abc def", 0, 4, "abc\n\nef\n");
+  TestLines("abc def", 0, 5, "abc\ndef\n");
+  // Length is 6, 7 required. Has to split at whitespace.
+  TestLines("abc def", 0, 6, "abc\ndef\n");
+  // FIXME: This should split after abc, and not print
+  // more whitespace on the end of the line or the start
+  // of the new one. Resulting in "abc\ndef\n".
+  TestLines("abc           def", 0, 6, "abc  \ndef\n");
+
+  const char *fox_str = "The quick brown fox.";
+  TestLines(fox_str, 0, 30, "The quick brown fox.\n");
+  TestLines(fox_str, 5, 30, "     The quick brown fox.\n");
+  TestLines(fox_str, 0, 15, "The quick\nbrown fox.\n");
+  // FIXME: Trim the spaces off of the end of the first line.
+  TestLines("The quick       brown fox.", 0, 15,
+            "The quick     \nbrown fox.\n");
+
+  // As ANSI codes do not add to visible length, the results
+  // should be the same as the plain text verison.
+  const char *fox_str_ansi = "\x1B[4mT\x1B[0mhe quick brown fox.";
+  TestLines(fox_str_ansi, 0, 30, "\x1B[4mT\x1B[0mhe quick brown fox.\n");
+  TestLines(fox_str_ansi, 5, 30, "     \x1B[4mT\x1B[0mhe quick brown fox.\n");
+  // FIXME: Account for ANSI codes not contributing to visible length.
+  TestLines(fox_str_ansi, 0, 15, "\x1B[4mT\x1B[0mhe\nquick br\n");
+
+  const std::string fox_str_emoji = "🦊 The quick brown fox. 🦊";
+  TestLines(fox_str_emoji, 0, 30, "🦊 The quick brown fox. 🦊\n");
+  // FIXME: This crashes when max columns is exactly 31.
+  // TestLines(fox_str_emoji, 5, 31, "     🦊 The quick brown fox. 🦊\n");
+  TestLines(fox_str_emoji, 5, 32, "     🦊 The quick brown fox. 🦊\n");
+  // FIXME: Final fox is missing.
+  TestLines(fox_str_emoji, 0, 15, "🦊 The quick\nbrown fox. \n");
+  // FIXME: should not split the middle of an emoji.
+  TestLines("🦊🦊🦊 🦊🦊", 0, 5, "\n\n\n\n\n\n\n\n\xF0\x9F\xA6\n");
+}

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to