To be clear: if we can make StringRef work efficiently, I am fine with that. We can just cut over to using llvm::Regex where it uses the start and end pointer. I would just like to avoid making string copies for no reason. I don't have anything against using StringRef as long as we do it efficiently.
Greg > On Sep 21, 2016, at 5:13 PM, Greg Clayton via lldb-commits > <lldb-commits@lists.llvm.org> wrote: > >> >> On Sep 21, 2016, at 4:43 PM, Zachary Turner <ztur...@google.com> wrote: >> >> You need to duplicate something on the heap once when you execute the regex. >> And in turn you save tens or hundreds or copies on the way there because of >> inefficient string usage. > > Where? please show this. > > I see the following callers: > > const char *class_name = > iterator->second->GetClassName().AsCString("<unknown>"); > if (regex_up && class_name && > !regex_up->Execute(llvm::StringRef(class_name))) > > You are adding a strlen() call here to construct the StringRef, not saving > anything. > > > bool CommandObjectRegexCommand::DoExecute(const char *command, > CommandReturnObject &result) { > if (command) { > EntryCollection::const_iterator pos, end = m_entries.end(); > for (pos = m_entries.begin(); pos != end; ++pos) { > RegularExpression::Match regex_match(m_max_matches); > > if (pos->regex.Execute(command, ®ex_match)) { > std::string new_command(pos->command); > std::string match_str; > > No copy saved. Just wasted time with strlen in StringRef constructor. > > > DataVisualization::Categories::ForEach( > [®ex, &result](const lldb::TypeCategoryImplSP &category_sp) -> bool > { > if (regex) { > bool escape = true; > if (regex->GetText() == category_sp->GetName()) { > escape = false; > } else if (regex->Execute(llvm::StringRef::withNullAsEmpty( > category_sp->GetName()))) { > escape = false; > } > > if (escape) > return true; > } > > No copy saved. Just wasted time with strlen in StringRef constructor. > > > TypeCategoryImpl::ForEachCallbacks<FormatterType> foreach; > foreach > .SetExact([&result, &formatter_regex, &any_printed]( > ConstString name, > const FormatterSharedPointer &format_sp) -> bool { > if (formatter_regex) { > bool escape = true; > if (name.GetStringRef() == formatter_regex->GetText()) { > escape = false; > } else if (formatter_regex->Execute(name.GetStringRef())) { > escape = false; > } > > No copy saved. Just wasted time with strlen in StringRef constructor. > > bool ParseCoordinate(const char *id_cstr) { > RegularExpression regex; > RegularExpression::Match regex_match(3); > > llvm::StringRef id_ref = llvm::StringRef::withNullAsEmpty(id_cstr); > bool matched = false; > if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+),([0-9]+)$")) && > regex.Execute(id_ref, ®ex_match)) > matched = true; > else if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+)$")) && > regex.Execute(id_ref, ®ex_match)) > matched = true; > else if (regex.Compile(llvm::StringRef("^([0-9]+)$")) && > regex.Execute(id_ref, ®ex_match)) > > No copy saved. Just wasted time with strlen in StringRef constructor. > > void DWARFCompileUnit::ParseProducerInfo() { > m_producer_version_major = UINT32_MAX; > m_producer_version_minor = UINT32_MAX; > m_producer_version_update = UINT32_MAX; > > const DWARFDebugInfoEntry *die = GetCompileUnitDIEPtrOnly(); > if (die) { > > const char *producer_cstr = die->GetAttributeValueAsString( > m_dwarf2Data, this, DW_AT_producer, NULL); > if (producer_cstr) { > RegularExpression llvm_gcc_regex( > llvm::StringRef("^4\\.[012]\\.[01] \\(Based on Apple " > "Inc\\. build [0-9]+\\) \\(LLVM build " > "[\\.0-9]+\\)$")); > if (llvm_gcc_regex.Execute(llvm::StringRef(producer_cstr))) { > m_producer = eProducerLLVMGCC; > } else if (strstr(producer_cstr, "clang")) { > static RegularExpression g_clang_version_regex( > llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)")); > RegularExpression::Match regex_match(3); > if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr), > ®ex_match)) { > > No copy saved. Just wasted time with strlen in StringRef constructor (2 of > them mind you). > > void DWARFDebugPubnamesSet::Find( > const RegularExpression ®ex, > std::vector<dw_offset_t> &die_offset_coll) const { > DescriptorConstIter pos; > DescriptorConstIter end = m_descriptors.end(); > for (pos = m_descriptors.begin(); pos != end; ++pos) { > if (regex.Execute(pos->name.c_str())) > die_offset_coll.push_back(m_header.die_offset + pos->offset); > } > } > > No copy saved. Just wasted time with strlen in StringRef constructor (2 of > them mind you). > > > std::string slice_str; > if (reg_info_dict->GetValueForKeyAsString("slice", slice_str, nullptr)) { > // Slices use the following format: > // REGNAME[MSBIT:LSBIT] > // REGNAME - name of the register to grab a slice of > // MSBIT - the most significant bit at which the current register value > // starts at > // LSBIT - the least significant bit at which the current register > value > // ends at > static RegularExpression g_bitfield_regex( > > llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]")); > RegularExpression::Match regex_match(3); > if (g_bitfield_regex.Execute(slice_str, ®ex_match)) { > > No copy saved. > > void SourceManager::File::FindLinesMatchingRegex( > RegularExpression ®ex, uint32_t start_line, uint32_t end_line, > std::vector<uint32_t> &match_lines) { > match_lines.clear(); > > if (!LineIsValid(start_line) || > (end_line != UINT32_MAX && !LineIsValid(end_line))) > return; > if (start_line > end_line) > return; > > for (uint32_t line_no = start_line; line_no < end_line; line_no++) { > std::string buffer; > if (!GetLine(line_no, buffer)) > break; > if (regex.Execute(buffer)) { > match_lines.push_back(line_no); > } > } > } > > No copy saved. > > > static dw_offset_t > FindCallbackString(SymbolFileDWARF *dwarf2Data, DWARFCompileUnit *cu, > DWARFDebugInfoEntry *die, const dw_offset_t next_offset, > const uint32_t curr_depth, void *userData) { > FindCallbackStringInfo *info = (FindCallbackStringInfo *)userData; > > if (!die) > return next_offset; > > const char *die_name = die->GetName(dwarf2Data, cu); > if (!die_name) > return next_offset; > > if (info->regex) { > if (info->regex->Execute(llvm::StringRef(die_name))) > info->die_offsets.push_back(die->GetOffset()); > } else { > if ((info->ignore_case ? strcasecmp(die_name, info->name) > : strcmp(die_name, info->name)) == 0) > info->die_offsets.push_back(die->GetOffset()); > } > > // Just return the current offset to parse the next CU or DIE entry > return next_offset; > } > > No copy saved. > > > bool Get_Impl(ConstString key, MapValueType &value, > lldb::RegularExpressionSP *dummy) { > llvm::StringRef key_str = key.GetStringRef(); > std::lock_guard<std::recursive_mutex> guard(m_format_map.mutex()); > MapIterator pos, end = m_format_map.map().end(); > for (pos = m_format_map.map().begin(); pos != end; pos++) { > lldb::RegularExpressionSP regex = pos->first; > if (regex->Execute(key_str)) { > value = pos->second; > return true; > } > } > return false; > } > > No copy saved. > > > PacketResult echo_packet_result = > SendPacketNoLock(llvm::StringRef(echo_packet, echo_packet_len)); > if (echo_packet_result == PacketResult::Success) { > const uint32_t max_retries = 3; > uint32_t successful_responses = 0; > for (uint32_t i = 0; i < max_retries; ++i) { > StringExtractorGDBRemote echo_response; > echo_packet_result = WaitForPacketWithTimeoutMicroSecondsNoLock( > echo_response, timeout_usec, false); > if (echo_packet_result == PacketResult::Success) { > ++successful_responses; > if (response_regex.Execute(echo_response.GetStringRef())) { > sync_success = true; > > No copy saved. > > > OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, > OptionValue::Type data_type) { > bool done = false; > char buffer[1024]; > > OptionValueSP option_value_sp(new OptionValueArray(1u << data_type)); > > int idx = 0; > while (!done) { > if (!fgets(buffer, 1023, in_file)) { > out_stream->Printf( > "Instruction::ReadArray: Error reading file (fgets).\n"); > option_value_sp.reset(); > return option_value_sp; > } > > std::string line(buffer); > > size_t len = line.size(); > if (line[len - 1] == '\n') { > line[len - 1] = '\0'; > line.resize(len - 1); > } > > if ((line.size() == 1) && line[0] == ']') { > done = true; > line.clear(); > } > > if (!line.empty()) { > std::string value; > static RegularExpression g_reg_exp( > llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); > RegularExpression::Match regex_match(1); > bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match); > if (reg_exp_success) > regex_match.GetMatchAtIndex(line.c_str(), 1, value); > else > value = line; > > No copy saved. > > I could do on with all 31 call sites, but I will stop. All that was added was > inefficiency and requiring us to make a copy of the string before it is used > to evaluate. Some of these are evaluated in tight loops, like the searching > for type summaries and formats that are regex based. Happens once for each > item in a SBValue (and once per child that is displayed... > >> We could also just un-delete the overload that takes a const char*, then the >> duplication would only ever happen when you explicitly use a StringRef. > > For execute it is fine to make one that takes a StringRef, but it would just > call .str().c_str() and call the other one. We would assume the "const char > *" version of execute would be null terminated. > >> >> I don't agree this should be reverted. In the process of doing this >> conversion I eliminated numerous careless string copies. > > As I showed above the opposite was true. We made many calls to strlen to > construct the StringRef so we actually made things slower. > > Greg > >> >> On Wed, Sep 21, 2016 at 4:38 PM Greg Clayton <gclay...@apple.com> wrote: >> This it the perfect example of why not to use a StringRef since the string >> needs to be null terminated. Why did we change this? Now even if you call >> this function: >> >> RegularExpression r(...); >> >> r.Execute(".......................", ...) >> >> You will need to duplicate the string on the heap just to execute this. >> Please revert this. Anything that requires null terminate is not a candidate >> for converting to StringRef. >> >> >>> On Sep 21, 2016, at 10:13 AM, Zachary Turner via lldb-commits >>> <lldb-commits@lists.llvm.org> wrote: >>> >>> Author: zturner >>> Date: Wed Sep 21 12:13:51 2016 >>> New Revision: 282090 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=282090&view=rev >>> Log: >>> Fix failing regex tests. >>> >>> r282079 converted the regular expression interface to accept >>> and return StringRefs instead of char pointers. In one case >>> a null pointer check was converted to an empty string check, >>> but this was an incorrect conversion because an empty string >>> is a valid regular expression. Removing this check should >>> fix the test failures. >>> >>> Modified: >>> lldb/trunk/source/Core/RegularExpression.cpp >>> >>> Modified: lldb/trunk/source/Core/RegularExpression.cpp >>> URL: >>> http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/RegularExpression.cpp?rev=282090&r1=282089&r2=282090&view=diff >>> ============================================================================== >>> --- lldb/trunk/source/Core/RegularExpression.cpp (original) >>> +++ lldb/trunk/source/Core/RegularExpression.cpp Wed Sep 21 12:13:51 2016 >>> @@ -102,7 +102,7 @@ bool RegularExpression::Compile(llvm::St >>> //--------------------------------------------------------------------- >>> bool RegularExpression::Execute(llvm::StringRef str, Match *match) const { >>> int err = 1; >>> - if (!str.empty() && m_comp_err == 0) { >>> + if (m_comp_err == 0) { >>> // Argument to regexec must be null-terminated. >>> std::string reg_str = str; >>> if (match) { >>> >>> >>> _______________________________________________ >>> lldb-commits mailing list >>> lldb-commits@lists.llvm.org >>> http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits >> > > _______________________________________________ > lldb-commits mailing list > lldb-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits