JDevlieghere updated this revision to Diff 215022.
JDevlieghere retitled this revision from "[Utility] Phase out RegularExpression 
and use llvm::Regex instead." to "[Utility] Reimplement RegularExpression on 
top of llvm::Regex".
JDevlieghere edited the summary of this revision.
JDevlieghere added reviewers: labath, clayborg, jingham, xiaobai.
Herald added subscribers: krytarowski, srhines.
Herald added a reviewer: jdoerfert.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66174/new/

https://reviews.llvm.org/D66174

Files:
  lldb/include/lldb/Interpreter/OptionValueRegex.h
  lldb/include/lldb/Utility/RegularExpression.h
  lldb/source/Commands/CommandObjectBreakpoint.cpp
  lldb/source/Commands/CommandObjectFrame.cpp
  lldb/source/Core/Disassembler.cpp
  lldb/source/Host/common/Socket.cpp
  lldb/source/Interpreter/CommandObjectRegexCommand.cpp
  lldb/source/Interpreter/OptionArgParser.cpp
  lldb/source/Interpreter/OptionValueRegex.cpp
  lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
  
lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
  lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
  lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
  lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
  lldb/source/Symbol/ObjectFile.cpp
  lldb/source/Symbol/Variable.cpp
  lldb/source/Target/ThreadPlanStepInRange.cpp
  lldb/source/Utility/RegularExpression.cpp
  lldb/unittests/Utility/NameMatchesTest.cpp

Index: lldb/unittests/Utility/NameMatchesTest.cpp
===================================================================
--- lldb/unittests/Utility/NameMatchesTest.cpp
+++ lldb/unittests/Utility/NameMatchesTest.cpp
@@ -49,8 +49,8 @@
 TEST(NameMatchesTest, RegularExpression) {
   EXPECT_TRUE(NameMatches("foobar", NameMatch::RegularExpression, "foo"));
   EXPECT_TRUE(NameMatches("foobar", NameMatch::RegularExpression, "f[oa]o"));
-  EXPECT_TRUE(NameMatches("foo", NameMatch::RegularExpression, ""));
-  EXPECT_TRUE(NameMatches("", NameMatch::RegularExpression, ""));
+  EXPECT_FALSE(NameMatches("", NameMatch::RegularExpression, ""));
+  EXPECT_FALSE(NameMatches("foo", NameMatch::RegularExpression, ""));
   EXPECT_FALSE(NameMatches("foo", NameMatch::RegularExpression, "b"));
   EXPECT_FALSE(NameMatches("", NameMatch::RegularExpression, "b"));
   EXPECT_FALSE(NameMatches("^a", NameMatch::RegularExpression, "^a"));
Index: lldb/source/Utility/RegularExpression.cpp
===================================================================
--- lldb/source/Utility/RegularExpression.cpp
+++ lldb/source/Utility/RegularExpression.cpp
@@ -22,16 +22,9 @@
 
 using namespace lldb_private;
 
-RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
-  memset(&m_preg, 0, sizeof(m_preg));
-}
-
 // Constructor that compiles "re" using "flags" and stores the resulting
 // compiled regular expression into this object.
-RegularExpression::RegularExpression(llvm::StringRef str)
-    : RegularExpression() {
-  Compile(str);
-}
+RegularExpression::RegularExpression(llvm::StringRef str) { Compile(str); }
 
 RegularExpression::RegularExpression(const RegularExpression &rhs)
     : RegularExpression() {
@@ -45,12 +38,6 @@
   return *this;
 }
 
-// Destructor
-//
-// Any previously compiled regular expression contained in this object will be
-// freed.
-RegularExpression::~RegularExpression() { Free(); }
-
 // Compile a regular expression using the supplied regular expression text and
 // flags. The compiled regular expression lives in this object so that it can
 // be readily used for regular expression matches. Execute() can be called
@@ -61,13 +48,9 @@
 //  True if the regular expression compiles successfully, false
 //  otherwise.
 bool RegularExpression::Compile(llvm::StringRef str) {
-  Free();
-
-  // regcomp() on darwin does not recognize "" as a valid regular expression,
-  // so we substitute it with an equivalent non-empty one.
-  m_re = str.empty() ? "()" : str;
-  m_comp_err = ::regcomp(&m_preg, m_re.c_str(), DEFAULT_COMPILE_FLAGS);
-  return m_comp_err == 0;
+  m_regex_text = str.empty() ? "()" : str;
+  m_regex = llvm::Regex(str);
+  return IsValid();
 }
 
 // Execute a regular expression match using the compiled regular expression
@@ -75,84 +58,25 @@
 // are used for regular expression matches "match_count" should indicate the
 // number of regmatch_t values that are present in "match_ptr". The regular
 // expression will be executed using the "execute_flags".
-bool RegularExpression::Execute(llvm::StringRef str, Match *match) const {
-  int err = 1;
-  if (m_comp_err == 0) {
-    // Argument to regexec must be null-terminated.
-    std::string reg_str = str;
-    if (match) {
-      err = ::regexec(&m_preg, reg_str.c_str(), match->GetSize(),
-                      match->GetData(), 0);
-    } else {
-      err = ::regexec(&m_preg, reg_str.c_str(), 0, nullptr, 0);
-    }
-  }
-
-  if (err != 0) {
-    // The regular expression didn't compile, so clear the matches
-    if (match)
-      match->Clear();
-    return false;
-  }
-  return true;
-}
-
-bool RegularExpression::Match::GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
-                                               std::string &match_str) const {
-  llvm::StringRef match_str_ref;
-  if (GetMatchAtIndex(s, idx, match_str_ref)) {
-    match_str = match_str_ref.str();
-    return true;
-  }
-  return false;
-}
-
-bool RegularExpression::Match::GetMatchAtIndex(
-    llvm::StringRef s, uint32_t idx, llvm::StringRef &match_str) const {
-  if (idx < m_matches.size()) {
-    if (m_matches[idx].rm_eo == -1 && m_matches[idx].rm_so == -1)
-      return false;
-
-    if (m_matches[idx].rm_eo == m_matches[idx].rm_so) {
-      // Matched the empty string...
-      match_str = llvm::StringRef();
-      return true;
-    } else if (m_matches[idx].rm_eo > m_matches[idx].rm_so) {
-      match_str = s.substr(m_matches[idx].rm_so,
-                           m_matches[idx].rm_eo - m_matches[idx].rm_so);
-      return true;
-    }
-  }
-  return false;
+bool RegularExpression::Execute(
+    llvm::StringRef str,
+    llvm::SmallVectorImpl<llvm::StringRef> *matches) const {
+  return m_regex.match(str, matches);
 }
 
 // Returns true if the regular expression compiled and is ready for execution.
-bool RegularExpression::IsValid() const { return m_comp_err == 0; }
-
-// Returns the text that was used to compile the current regular expression.
-llvm::StringRef RegularExpression::GetText() const { return m_re; }
-
-// Free any contained compiled regular expressions.
-void RegularExpression::Free() {
-  if (m_comp_err == 0) {
-    m_re.clear();
-    regfree(&m_preg);
-    // Set a compile error since we no longer have a valid regex
-    m_comp_err = 1;
-  }
+bool RegularExpression::IsValid() const {
+  std::string discard;
+  return m_regex.isValid(discard);
 }
 
-size_t RegularExpression::GetErrorAsCString(char *err_str,
-                                            size_t err_str_max_len) const {
-  if (m_comp_err == 0) {
-    if (err_str && err_str_max_len)
-      *err_str = '\0';
-    return 0;
-  }
-
-  return ::regerror(m_comp_err, &m_preg, err_str, err_str_max_len);
-}
+// Returns the text that was used to compile the current regular expression.
+llvm::StringRef RegularExpression::GetText() const { return m_regex_text; }
 
-bool RegularExpression::operator<(const RegularExpression &rhs) const {
-  return (m_re < rhs.m_re);
+// Returns true if the regular expression compiled and is ready for execution.
+llvm::Optional<std::string> RegularExpression::GetError() const {
+  std::string error;
+  if (!m_regex.isValid(error))
+    return error;
+  return {};
 }
Index: lldb/source/Target/ThreadPlanStepInRange.cpp
===================================================================
--- lldb/source/Target/ThreadPlanStepInRange.cpp
+++ lldb/source/Target/ThreadPlanStepInRange.cpp
@@ -361,26 +361,18 @@
           sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
               .GetCString();
       if (frame_function_name) {
-        size_t num_matches = 0;
-        Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
-        if (log)
-          num_matches = 1;
-
-        RegularExpression::Match regex_match(num_matches);
-
+        llvm::SmallVector<llvm::StringRef, 2> matches;
         bool return_value =
-            avoid_regexp_to_use->Execute(frame_function_name, &regex_match);
-        if (return_value) {
-          if (log) {
-            std::string match;
-            regex_match.GetMatchAtIndex(frame_function_name, 0, match);
-            LLDB_LOGF(log,
-                      "Stepping out of function \"%s\" because it matches "
-                      "the avoid regexp \"%s\" - match substring: \"%s\".",
-                      frame_function_name,
-                      avoid_regexp_to_use->GetText().str().c_str(),
-                      match.c_str());
-          }
+            avoid_regexp_to_use->Execute(frame_function_name, &matches);
+        Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
+        if (return_value && log && matches.size() >= 2) {
+          std::string match = matches[1].str();
+          LLDB_LOGF(log,
+                    "Stepping out of function \"%s\" because it matches "
+                    "the avoid regexp \"%s\" - match substring: \"%s\".",
+                    frame_function_name,
+                    avoid_regexp_to_use->GetText().str().c_str(),
+                    match.c_str());
         }
         return return_value;
       }
Index: lldb/source/Symbol/Variable.cpp
===================================================================
--- lldb/source/Symbol/Variable.cpp
+++ lldb/source/Symbol/Variable.cpp
@@ -390,21 +390,21 @@
   default: {
     static RegularExpression g_regex(
         llvm::StringRef("^([A-Za-z_:][A-Za-z_0-9:]*)(.*)"));
-    RegularExpression::Match regex_match(1);
-    std::string variable_name;
+    llvm::SmallVector<llvm::StringRef, 2> matches;
     variable_list.Clear();
-    if (!g_regex.Execute(variable_expr_path, &regex_match)) {
+    if (!g_regex.Execute(variable_expr_path, &matches)) {
       error.SetErrorStringWithFormat(
           "unable to extract a variable name from '%s'",
           variable_expr_path.str().c_str());
       return error;
     }
-    if (!regex_match.GetMatchAtIndex(variable_expr_path, 1, variable_name)) {
+    if (matches.size() < 2) {
       error.SetErrorStringWithFormat(
           "unable to extract a variable name from '%s'",
           variable_expr_path.str().c_str());
       return error;
     }
+    std::string variable_name = matches[1].str();
     if (!callback(baton, variable_name.c_str(), variable_list)) {
       error.SetErrorString("unknown error");
       return error;
Index: lldb/source/Symbol/ObjectFile.cpp
===================================================================
--- lldb/source/Symbol/ObjectFile.cpp
+++ lldb/source/Symbol/ObjectFile.cpp
@@ -575,14 +575,14 @@
                                             FileSpec &archive_file,
                                             ConstString &archive_object,
                                             bool must_exist) {
+  llvm::SmallVector<llvm::StringRef, 3> matches;
   RegularExpression g_object_regex(llvm::StringRef("(.*)\\(([^\\)]+)\\)$"));
-  RegularExpression::Match regex_match(2);
   if (g_object_regex.Execute(llvm::StringRef::withNullAsEmpty(path_with_object),
-                             &regex_match)) {
-    std::string path;
-    std::string obj;
-    if (regex_match.GetMatchAtIndex(path_with_object, 1, path) &&
-        regex_match.GetMatchAtIndex(path_with_object, 2, obj)) {
+                             &matches)) {
+
+    if (matches.size() >= 3) {
+      std::string path = matches[1].str();
+      std::string obj = matches[2].str();
       archive_file.SetFile(path, FileSpec::Style::native);
       archive_object.SetCString(obj.c_str());
       return !(must_exist && !FileSystem::Instance().Exists(archive_file));
Index: lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
===================================================================
--- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -540,19 +540,19 @@
       } else if (strstr(producer_cstr, "clang")) {
         static RegularExpression g_clang_version_regex(
             llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)"));
-        RegularExpression::Match regex_match(3);
+        llvm::SmallVector<llvm::StringRef, 4> matches;
         if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr),
-                                          &regex_match)) {
-          std::string str;
-          if (regex_match.GetMatchAtIndex(producer_cstr, 1, str))
-            m_producer_version_major =
-                StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
-          if (regex_match.GetMatchAtIndex(producer_cstr, 2, str))
-            m_producer_version_minor =
-                StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
-          if (regex_match.GetMatchAtIndex(producer_cstr, 3, str))
-            m_producer_version_update =
-                StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
+                                          &matches)) {
+          const size_t number_of_matches = matches.size();
+          if (number_of_matches > 1)
+            m_producer_version_major = StringConvert::ToUInt32(
+                matches[1].str().c_str(), UINT32_MAX, 10);
+          if (number_of_matches > 2)
+            m_producer_version_minor = StringConvert::ToUInt32(
+                matches[2].str().c_str(), UINT32_MAX, 10);
+          if (number_of_matches > 3)
+            m_producer_version_update = StringConvert::ToUInt32(
+                matches[3].str().c_str(), UINT32_MAX, 10);
         }
         m_producer = eProducerClang;
       } else if (strstr(producer_cstr, "GNU"))
Index: lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
===================================================================
--- lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
+++ lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
@@ -288,11 +288,8 @@
 
     // Instantiate the regex so we can report any errors.
     auto regex = RegularExpression(op_arg);
-    if (!regex.IsValid()) {
-      char error_text[256];
-      error_text[0] = '\0';
-      regex.GetErrorAsCString(error_text, sizeof(error_text));
-      error.SetErrorString(error_text);
+    if (auto err = regex.GetError()) {
+      error.SetErrorString(*err);
       return FilterRuleSP();
     }
 
Index: lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
===================================================================
--- lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
+++ lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
@@ -137,14 +137,12 @@
         // ends at
         static RegularExpression g_bitfield_regex(
             llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]"));
-        RegularExpression::Match regex_match(3);
-        if (g_bitfield_regex.Execute(slice_str, &regex_match)) {
-          llvm::StringRef reg_name_str;
-          std::string msbit_str;
-          std::string lsbit_str;
-          if (regex_match.GetMatchAtIndex(slice_str, 1, reg_name_str) &&
-              regex_match.GetMatchAtIndex(slice_str, 2, msbit_str) &&
-              regex_match.GetMatchAtIndex(slice_str, 3, lsbit_str)) {
+        llvm::SmallVector<llvm::StringRef, 4> matches;
+        if (g_bitfield_regex.Execute(slice_str, &matches)) {
+          if (matches.size() >= 4) {
+            std::string reg_name_str = matches[1].str();
+            std::string msbit_str = matches[2].str();
+            std::string lsbit_str = matches[3].str();
             const uint32_t msbit =
                 StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX);
             const uint32_t lsbit =
Index: lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
===================================================================
--- lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
+++ lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
@@ -443,27 +443,28 @@
   // returned, `true` otherwise
 
   RegularExpression regex;
-  RegularExpression::Match regex_match(3);
+  llvm::SmallVector<llvm::StringRef, 4> matches;
 
   bool matched = false;
   if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+),([0-9]+)$")) &&
-      regex.Execute(coord_s, &regex_match))
+      regex.Execute(coord_s, &matches))
     matched = true;
   else if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+)$")) &&
-           regex.Execute(coord_s, &regex_match))
+           regex.Execute(coord_s, &matches))
     matched = true;
   else if (regex.Compile(llvm::StringRef("^([0-9]+)$")) &&
-           regex.Execute(coord_s, &regex_match))
+           regex.Execute(coord_s, &matches))
     matched = true;
 
   if (!matched)
     return false;
 
-  auto get_index = [&](int idx, uint32_t &i) -> bool {
+  auto get_index = [&](size_t idx, uint32_t &i) -> bool {
     std::string group;
     errno = 0;
-    if (regex_match.GetMatchAtIndex(coord_s.str().c_str(), idx + 1, group))
-      return !llvm::StringRef(group).getAsInteger<uint32_t>(10, i);
+    if (idx + 1 < matches.size()) {
+      return !llvm::StringRef(matches[idx + 1]).getAsInteger<uint32_t>(10, i);
+    }
     return true;
   };
 
@@ -4147,13 +4148,12 @@
       // Matching a comma separated list of known words is fairly
       // straightforward with PCRE, but we're using ERE, so we end up with a
       // little ugliness...
-      RegularExpression::Match match(/* max_matches */ 5);
       RegularExpression match_type_list(
           llvm::StringRef("^([[:alpha:]]+)(,[[:alpha:]]+){0,4}$"));
 
       assert(match_type_list.IsValid());
 
-      if (!match_type_list.Execute(option_val, &match)) {
+      if (!match_type_list.Execute(option_val)) {
         err_str.PutCString(
             "a comma-separated list of kernel types is required");
         return false;
Index: lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
===================================================================
--- lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
+++ lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
@@ -381,11 +381,10 @@
         static RegularExpression s_regex(
             llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
 
-        RegularExpression::Match matches(3);
-
-        if (s_regex.Execute(out_string, &matches)) {
-          matches.GetMatchAtIndex(out_string.c_str(), 1, m_opcode_name);
-          matches.GetMatchAtIndex(out_string.c_str(), 2, m_mnemonics);
+        llvm::SmallVector<llvm::StringRef, 4> matches;
+        if (s_regex.Execute(out_string, &matches) && matches.size() >= 3) {
+          m_opcode_name = matches[1].str();
+          m_mnemonics = matches[2].str();
         }
       }
     }
Index: lldb/source/Interpreter/OptionValueRegex.cpp
===================================================================
--- lldb/source/Interpreter/OptionValueRegex.cpp
+++ lldb/source/Interpreter/OptionValueRegex.cpp
@@ -49,13 +49,10 @@
     if (m_regex.Compile(value)) {
       m_value_was_set = true;
       NotifyValueChanged();
+    } else if (auto e = m_regex.GetError()) {
+      error.SetErrorString(*e);
     } else {
-      char regex_error[1024];
-      if (m_regex.GetErrorAsCString(regex_error, sizeof(regex_error)))
-        error.SetErrorString(regex_error);
-      else
-        error.SetErrorStringWithFormat("regex error %u",
-                                       m_regex.GetErrorCode());
+      error.SetErrorString("regex error");
     }
     break;
   }
Index: lldb/source/Interpreter/OptionArgParser.cpp
===================================================================
--- lldb/source/Interpreter/OptionArgParser.cpp
+++ lldb/source/Interpreter/OptionArgParser.cpp
@@ -211,28 +211,22 @@
     // pointer types.
     static RegularExpression g_symbol_plus_offset_regex(
         "^(.*)([-\\+])[[:space:]]*(0x[0-9A-Fa-f]+|[0-9]+)[[:space:]]*$");
-    RegularExpression::Match regex_match(3);
-    if (g_symbol_plus_offset_regex.Execute(sref, &regex_match)) {
+
+    llvm::SmallVector<llvm::StringRef, 4> matches;
+    if (g_symbol_plus_offset_regex.Execute(sref, &matches)) {
       uint64_t offset = 0;
-      bool add = true;
-      std::string name;
-      std::string str;
-      if (regex_match.GetMatchAtIndex(s, 1, name)) {
-        if (regex_match.GetMatchAtIndex(s, 2, str)) {
-          add = str[0] == '+';
-
-          if (regex_match.GetMatchAtIndex(s, 3, str)) {
-            if (!llvm::StringRef(str).getAsInteger(0, offset)) {
-              Status error;
-              addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS,
-                               &error);
-              if (addr != LLDB_INVALID_ADDRESS) {
-                if (add)
-                  return addr + offset;
-                else
-                  return addr - offset;
-              }
-            }
+      if (matches.size() >= 3) {
+        std::string name = matches[1].str();
+        std::string sign = matches[2].str();
+        std::string str_offset = matches[3].str();
+        if (!llvm::StringRef(str_offset).getAsInteger(0, offset)) {
+          Status error;
+          addr = ToAddress(exe_ctx, name.c_str(), LLDB_INVALID_ADDRESS, &error);
+          if (addr != LLDB_INVALID_ADDRESS) {
+            if (sign[0] == '+')
+              return addr + offset;
+            else
+              return addr - offset;
           }
         }
       }
Index: lldb/source/Interpreter/CommandObjectRegexCommand.cpp
===================================================================
--- lldb/source/Interpreter/CommandObjectRegexCommand.cpp
+++ lldb/source/Interpreter/CommandObjectRegexCommand.cpp
@@ -30,15 +30,14 @@
                                           CommandReturnObject &result) {
   EntryCollection::const_iterator pos, end = m_entries.end();
   for (pos = m_entries.begin(); pos != end; ++pos) {
-    RegularExpression::Match regex_match(m_max_matches);
-
-    if (pos->regex.Execute(command, &regex_match)) {
+    llvm::SmallVector<llvm::StringRef, 4> matches;
+    if (pos->regex.Execute(command, &matches)) {
       std::string new_command(pos->command);
-      std::string match_str;
       char percent_var[8];
       size_t idx, percent_var_idx;
       for (uint32_t match_idx = 1; match_idx <= m_max_matches; ++match_idx) {
-        if (regex_match.GetMatchAtIndex(command, match_idx, match_str)) {
+        if (match_idx < matches.size()) {
+          const std::string match_str = matches[match_idx].str();
           const int percent_var_len =
               ::snprintf(percent_var, sizeof(percent_var), "%%%u", match_idx);
           for (idx = 0; (percent_var_idx = new_command.find(
Index: lldb/source/Host/common/Socket.cpp
===================================================================
--- lldb/source/Host/common/Socket.cpp
+++ lldb/source/Host/common/Socket.cpp
@@ -282,10 +282,11 @@
                                int32_t &port, Status *error_ptr) {
   static RegularExpression g_regex(
       llvm::StringRef("([^:]+|\\[[0-9a-fA-F:]+.*\\]):([0-9]+)"));
-  RegularExpression::Match regex_match(2);
-  if (g_regex.Execute(host_and_port, &regex_match)) {
-    if (regex_match.GetMatchAtIndex(host_and_port, 1, host_str) &&
-        regex_match.GetMatchAtIndex(host_and_port, 2, port_str)) {
+  llvm::SmallVector<llvm::StringRef, 3> matches;
+  if (g_regex.Execute(host_and_port, &matches)) {
+    if (matches.size() >= 3) {
+      host_str = matches[1].str();
+      port_str = matches[2].str();
       // IPv6 addresses are wrapped in [] when specified with ports
       if (host_str.front() == '[' && host_str.back() == ']')
         host_str = host_str.substr(1, host_str.size() - 2);
Index: lldb/source/Core/Disassembler.cpp
===================================================================
--- lldb/source/Core/Disassembler.cpp
+++ lldb/source/Core/Disassembler.cpp
@@ -355,12 +355,9 @@
     const char *function_name =
         sc.GetFunctionName(Mangled::ePreferDemangledWithoutArguments)
             .GetCString();
-    if (function_name) {
-      RegularExpression::Match regex_match(1);
-      if (avoid_regex->Execute(function_name, &regex_match)) {
-        // skip this source line
-        return true;
-      }
+    if (function_name && avoid_regex->Execute(function_name)) {
+      // skip this source line
+      return true;
     }
   }
   // don't skip this source line
@@ -793,10 +790,9 @@
       std::string value;
       static RegularExpression g_reg_exp(
           llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$"));
-      RegularExpression::Match regex_match(1);
-      bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
-      if (reg_exp_success)
-        regex_match.GetMatchAtIndex(line.c_str(), 1, value);
+      llvm::SmallVector<llvm::StringRef, 2> matches;
+      if (g_reg_exp.Execute(line, &matches) && matches.size() >= 1)
+        value = matches[1].str();
       else
         value = line;
 
@@ -856,14 +852,15 @@
     if (!line.empty()) {
       static RegularExpression g_reg_exp(llvm::StringRef(
           "^[ \t]*([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*=[ \t]*(.*)[ \t]*$"));
-      RegularExpression::Match regex_match(2);
 
-      bool reg_exp_success = g_reg_exp.Execute(line, &regex_match);
+      llvm::SmallVector<llvm::StringRef, 3> matches;
+
+      bool reg_exp_success = g_reg_exp.Execute(line, &matches);
       std::string key;
       std::string value;
-      if (reg_exp_success) {
-        regex_match.GetMatchAtIndex(line.c_str(), 1, key);
-        regex_match.GetMatchAtIndex(line.c_str(), 2, value);
+      if (reg_exp_success && matches.size() > 2) {
+        key = matches[1].str();
+        value = matches[2].str();
       } else {
         out_stream->Printf("Instruction::ReadDictionary: Failure executing "
                            "regular expression.\n");
Index: lldb/source/Commands/CommandObjectFrame.cpp
===================================================================
--- lldb/source/Commands/CommandObjectFrame.cpp
+++ lldb/source/Commands/CommandObjectFrame.cpp
@@ -1,4 +1,4 @@
-//===-- CommandObjectFrame.cpp ----------------------------------*- C++ -*-===//
+//===-- CommandObrectFrame.cpp ----------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -573,9 +573,8 @@
                                                entry.c_str());
               }
             } else {
-              char regex_error[1024];
-              if (regex.GetErrorAsCString(regex_error, sizeof(regex_error)))
-                result.GetErrorStream().Printf("error: %s\n", regex_error);
+              if (auto err = regex.GetError())
+                result.GetErrorStream().Printf("error: %s\n", err->c_str());
               else
                 result.GetErrorStream().Printf(
                     "error: unknown regex error when compiling '%s'\n",
Index: lldb/source/Commands/CommandObjectBreakpoint.cpp
===================================================================
--- lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -682,12 +682,10 @@
                                  // name
       {
         RegularExpression regexp(m_options.m_func_regexp);
-        if (!regexp.IsValid()) {
-          char err_str[1024];
-          regexp.GetErrorAsCString(err_str, sizeof(err_str));
+        if (auto error = regexp.GetError()) {
           result.AppendErrorWithFormat(
               "Function name regular expression could not be compiled: \"%s\"",
-              err_str);
+              error->c_str());
           result.SetStatus(eReturnStatusFailed);
           return false;
         }
@@ -718,12 +716,10 @@
       }
 
       RegularExpression regexp(m_options.m_source_text_regexp);
-      if (!regexp.IsValid()) {
-        char err_str[1024];
-        regexp.GetErrorAsCString(err_str, sizeof(err_str));
+      if (auto error = regexp.GetError()) {
         result.AppendErrorWithFormat(
             "Source text regular expression could not be compiled: \"%s\"",
-            err_str);
+            error->c_str());
         result.SetStatus(eReturnStatusFailed);
         return false;
       }
Index: lldb/include/lldb/Utility/RegularExpression.h
===================================================================
--- lldb/include/lldb/Utility/RegularExpression.h
+++ lldb/include/lldb/Utility/RegularExpression.h
@@ -9,98 +9,21 @@
 #ifndef liblldb_RegularExpression_h_
 #define liblldb_RegularExpression_h_
 
-#ifdef _WIN32
-#include "../lib/Support/regex_impl.h"
-
-typedef llvm_regmatch_t regmatch_t;
-typedef llvm_regex_t regex_t;
-
-inline int regcomp(llvm_regex_t *a, const char *b, int c) {
-  return llvm_regcomp(a, b, c);
-}
-
-inline size_t regerror(int a, const llvm_regex_t *b, char *c, size_t d) {
-  return llvm_regerror(a, b, c, d);
-}
-
-inline int regexec(const llvm_regex_t *a, const char *b, size_t c,
-                   llvm_regmatch_t d[], int e) {
-  return llvm_regexec(a, b, c, d, e);
-}
-
-inline void regfree(llvm_regex_t *a) { llvm_regfree(a); }
-#else
-#ifdef __ANDROID__
-#include <regex>
-#endif
-#include <regex.h>
-#endif
-
-#include <string>
-#include <vector>
-
-#include <stddef.h>
-#include <stdint.h>
-
-namespace llvm {
-class StringRef;
-} // namespace llvm
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
 
 namespace lldb_private {
 
-/// \class RegularExpression RegularExpression.h
-/// "lldb/Utility/RegularExpression.h"
-/// A C++ wrapper class for regex.
-///
-/// This regular expression class wraps the posix regex functions \c
-/// regcomp(), \c regerror(), \c regexec(), and \c regfree() from the header
-/// file in \c /usr/include/regex\.h.
-class RegularExpression {
+class RegularExpression : public llvm::Regex {
 public:
-  class Match {
-  public:
-    Match(uint32_t max_matches) : m_matches() {
-      if (max_matches > 0)
-        m_matches.resize(max_matches + 1);
-    }
-
-    void Clear() {
-      const size_t num_matches = m_matches.size();
-      regmatch_t invalid_match = {-1, -1};
-      for (size_t i = 0; i < num_matches; ++i)
-        m_matches[i] = invalid_match;
-    }
-
-    size_t GetSize() const { return m_matches.size(); }
-
-    regmatch_t *GetData() {
-      return (m_matches.empty() ? nullptr : m_matches.data());
-    }
-
-    bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
-                         std::string &match_str) const;
-
-    bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx,
-                         llvm::StringRef &match_str) const;
-
-  protected:
-    std::vector<regmatch_t>
-        m_matches; ///< Where parenthesized subexpressions results are stored
-  };
-
   /// Default constructor.
   ///
   /// The default constructor that initializes the object state such that it
   /// contains no compiled regular expression.
-  RegularExpression();
+  RegularExpression() = default;
 
   explicit RegularExpression(llvm::StringRef string);
-
-  /// Destructor.
-  ///
-  /// Any previously compiled regular expression contained in this object will
-  /// be freed.
-  ~RegularExpression();
+  ~RegularExpression() = default;
 
   RegularExpression(const RegularExpression &rhs);
 
@@ -122,7 +45,6 @@
   ///     \b true if the regular expression compiles successfully,
   ///     \b false otherwise.
   bool Compile(llvm::StringRef string);
-  bool Compile(const char *) = delete;
 
   /// Executes a regular expression.
   ///
@@ -143,16 +65,8 @@
   /// \return
   ///     \b true if \a string matches the compiled regular
   ///     expression, \b false otherwise.
-  bool Execute(llvm::StringRef string, Match *match = nullptr) const;
-  bool Execute(const char *, Match * = nullptr) = delete;
-
-  size_t GetErrorAsCString(char *err_str, size_t err_str_max_len) const;
-
-  /// Free the compiled regular expression.
-  ///
-  /// If this object contains a valid compiled regular expression, this
-  /// function will free any resources it was consuming.
-  void Free();
+  bool Execute(llvm::StringRef string,
+               llvm::SmallVectorImpl<llvm::StringRef> *matches = nullptr) const;
 
   /// Access the regular expression text.
   ///
@@ -169,25 +83,17 @@
   /// Test if this object contains a valid regular expression.
   ///
   /// \return
-  ///     \b true if the regular expression compiled and is ready
-  ///     for execution, \b false otherwise.
+  ///     \b true if the regular expression compiled and is ready for
+  ///     execution, \b false otherwise.
   bool IsValid() const;
 
-  void Clear() {
-    Free();
-    m_re.clear();
-    m_comp_err = 1;
-  }
-
-  int GetErrorCode() const { return m_comp_err; }
-
-  bool operator<(const RegularExpression &rhs) const;
+  llvm::Optional<std::string> GetError() const;
 
 private:
-  // Member variables
-  std::string m_re; ///< A copy of the original regular expression text
-  int m_comp_err;   ///< Status code for the regular expression compilation
-  regex_t m_preg;   ///< The compiled regular expression
+  /// A copy of the original regular expression text.
+  std::string m_regex_text;
+  /// The compiled regular expression.
+  mutable llvm::Regex m_regex;
 };
 
 } // namespace lldb_private
Index: lldb/include/lldb/Interpreter/OptionValueRegex.h
===================================================================
--- lldb/include/lldb/Interpreter/OptionValueRegex.h
+++ lldb/include/lldb/Interpreter/OptionValueRegex.h
@@ -36,7 +36,7 @@
                      VarSetOperationType = eVarSetOperationAssign) = delete;
 
   bool Clear() override {
-    m_regex.Clear();
+    m_regex = RegularExpression();
     m_value_was_set = false;
     return true;
   }
@@ -52,7 +52,7 @@
     if (value && value[0])
       m_regex.Compile(llvm::StringRef(value));
     else
-      m_regex.Clear();
+      m_regex = RegularExpression();
   }
 
   bool IsValid() const { return m_regex.IsValid(); }
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to