JDevlieghere created this revision.
JDevlieghere added reviewers: jingham, clayborg, aprantl, mib.
Herald added a subscriber: mgorny.
JDevlieghere requested review of this revision.

Jim noticed that the regex command is unintentionally recursive:

Let's use the following command regex as an example:

  (lldb) com regex humm 's/([^ ]+) ([^ ]+)/p %1 %2 %1 %2/'
  
  If we call it with arguments `foo bar`, thing behave as expected:

(lldb) humm foo bar
(...)
foo bar foo bar

  However, if we include `%2` in the arguments, things break down:

(lldb) humm fo%2o bar
(...)
fobaro bar fobaro bar

  The problem is that the implementation of the substitution is too naive.  It 
substitutes the %1 token into the target template in place, then does the %2 
substitution starting with the resultant string. So if the previous 
substitution introduced a %2 token, it would get processed in the second sweep, 
etc.
  
  This patch addresses the issue by walking the command once and substituting 
the % variables in place. I didn't really trust myself so I also wrote a few 
unit tests to make sure I didn't miss any edge cases. 
  
  rdar://81236994


https://reviews.llvm.org/D120101

Files:
  lldb/source/Commands/CommandObjectRegexCommand.cpp
  lldb/source/Commands/CommandObjectRegexCommand.h
  lldb/unittests/Interpreter/CMakeLists.txt
  lldb/unittests/Interpreter/TestRegexCommand.cpp

Index: lldb/unittests/Interpreter/TestRegexCommand.cpp
===================================================================
--- /dev/null
+++ lldb/unittests/Interpreter/TestRegexCommand.cpp
@@ -0,0 +1,51 @@
+//===-- TestRegexCommand.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Commands/CommandObjectRegexCommand.h"
+
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace lldb;
+
+namespace {
+class TestRegexCommand : public CommandObjectRegexCommand {
+public:
+  static std::string
+  Substitute(std::string input,
+             const llvm::SmallVectorImpl<llvm::StringRef> &replacements) {
+    SubstituteVariables(input, replacements);
+    return input;
+  }
+};
+} // namespace
+
+TEST(RegexCommandTest, SubstituteVariables) {
+  const llvm::SmallVector<llvm::StringRef, 4> substitutions = {"UNUSED", "foo",
+                                                               "bar", "baz"};
+
+  EXPECT_EQ(TestRegexCommand::Substitute("%1", substitutions), "foo");
+  EXPECT_EQ(TestRegexCommand::Substitute("%2", substitutions), "bar");
+  EXPECT_EQ(TestRegexCommand::Substitute("%3", substitutions), "baz");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1%2%3", substitutions), "foobarbaz");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1%2%3%4", substitutions),
+            "foobarbaz%4");
+  EXPECT_EQ(TestRegexCommand::Substitute("#%1#%2#%3#", substitutions),
+            "#foo#bar#baz#");
+  EXPECT_EQ(TestRegexCommand::Substitute("%11", substitutions), "%11");
+}
+
+TEST(RegexCommandTest, SubstituteVariablesNoRecursion) {
+  const llvm::SmallVector<llvm::StringRef, 4> substitutions = {"UNUSED", "%2",
+                                                               "%3", "%4"};
+
+  EXPECT_EQ(TestRegexCommand::Substitute("%1", substitutions), "%2");
+  EXPECT_EQ(TestRegexCommand::Substitute("%2", substitutions), "%3");
+  EXPECT_EQ(TestRegexCommand::Substitute("%3", substitutions), "%4");
+  EXPECT_EQ(TestRegexCommand::Substitute("%1%2%3", substitutions), "%2%3%4");
+}
Index: lldb/unittests/Interpreter/CMakeLists.txt
===================================================================
--- lldb/unittests/Interpreter/CMakeLists.txt
+++ lldb/unittests/Interpreter/CMakeLists.txt
@@ -4,6 +4,7 @@
   TestOptionArgParser.cpp
   TestOptionValue.cpp
   TestOptionValueFileColonLine.cpp
+  TestRegexCommand.cpp
 
   LINK_LIBS
       lldbCore
Index: lldb/source/Commands/CommandObjectRegexCommand.h
===================================================================
--- lldb/source/Commands/CommandObjectRegexCommand.h
+++ lldb/source/Commands/CommandObjectRegexCommand.h
@@ -39,6 +39,14 @@
 protected:
   bool DoExecute(llvm::StringRef command, CommandReturnObject &result) override;
 
+  /// Substitute variables of the format %1 in the input string.
+  ///
+  /// The replacements vector is expected to be the result of a regex match, so
+  /// %1 will be matched with replacements[1] and not replacements[0].
+  static void SubstituteVariables(
+      std::string &str,
+      const llvm::SmallVectorImpl<llvm::StringRef> &replacements);
+
   struct Entry {
     RegularExpression regex;
     std::string command;
Index: lldb/source/Commands/CommandObjectRegexCommand.cpp
===================================================================
--- lldb/source/Commands/CommandObjectRegexCommand.cpp
+++ lldb/source/Commands/CommandObjectRegexCommand.cpp
@@ -9,6 +9,7 @@
 #include "CommandObjectRegexCommand.h"
 #include "lldb/Interpreter/CommandInterpreter.h"
 #include "lldb/Interpreter/CommandReturnObject.h"
+#include <cmath>
 
 using namespace lldb;
 using namespace lldb_private;
@@ -25,28 +26,42 @@
 // Destructor
 CommandObjectRegexCommand::~CommandObjectRegexCommand() = default;
 
+void CommandObjectRegexCommand::SubstituteVariables(
+    std::string &str,
+    const llvm::SmallVectorImpl<llvm::StringRef> &replacements) {
+  const char pattern = '%';
+
+  // Walk the string left to right by jumping between positions with a '%'.
+  size_t pos = str.find(pattern);
+  while (pos != std::string::npos) {
+    // Parse the number following '%'.
+    const size_t idx = std::atoi(str.c_str() + pos + 1);
+
+    // If the '%' is not followed by a number, or the number is greater than
+    // the number of replacements, look for the next '%'.
+    if (idx == 0 || idx >= replacements.size()) {
+      pos = str.find(pattern, pos + 1);
+      continue;
+    }
+
+    // Replace the % and subsequent number by the corresponding replacement
+    // before looking for the next '%'.
+    const std::string replacement(replacements[idx]);
+    const size_t pattern_size = 1 + static_cast<size_t>(log10(idx) + 1);
+    str.replace(pos, pattern_size, replacement);
+    pos = str.find(pattern, pos + replacement.size());
+  }
+}
+
 bool CommandObjectRegexCommand::DoExecute(llvm::StringRef command,
                                           CommandReturnObject &result) {
   EntryCollection::const_iterator pos, end = m_entries.end();
   for (pos = m_entries.begin(); pos != end; ++pos) {
     llvm::SmallVector<llvm::StringRef, 4> matches;
     if (pos->regex.Execute(command, &matches)) {
-      std::string new_command(pos->command);
-      char percent_var[8];
-      size_t idx, percent_var_idx;
-      for (uint32_t match_idx = 1; match_idx <= m_max_matches; ++match_idx) {
-        if (match_idx < matches.size()) {
-          const std::string match_str = matches[match_idx].str();
-          const int percent_var_len =
-              ::snprintf(percent_var, sizeof(percent_var), "%%%u", match_idx);
-          for (idx = 0; (percent_var_idx = new_command.find(
-                             percent_var, idx)) != std::string::npos;) {
-            new_command.erase(percent_var_idx, percent_var_len);
-            new_command.insert(percent_var_idx, match_str);
-            idx = percent_var_idx + match_str.size();
-          }
-        }
-      }
+      std::string new_command = pos->command;
+      SubstituteVariables(new_command, matches);
+
       // Interpret the new command and return this as the result!
       if (m_interpreter.GetExpandRegexAliases())
         result.GetOutputStream().Printf("%s\n", new_command.c_str());
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to