Yep, and we can't have any regex objects in LLDB using those because they will only work on Apple and we don't want code like:
#if defined(__APPLE__) RegularExpression e("\s+"); #else RegularExpression e("[ \t]+"); #endif I know "\s" is probably extended, so this was a bad example, but you get my drift. Greg > On Sep 21, 2016, at 5:19 PM, Zachary Turner <ztur...@google.com> wrote: > > That sounds like a plan. BTW, extended is the one that everyone supports, > enhanced is the one that only apple supports. > > On Wed, Sep 21, 2016 at 5:18 PM Greg Clayton <gclay...@apple.com> wrote: > And we should check for any "extended" mode regex stuff and get rid of it > since as you said they are not portable. They tend to be shortcuts for > classes of objects and we can just fix the regex to be more explicit about > it. For now we would keep the lldb_private::RegularExpression class, have it > have a llvm::Regex member and then lldbassert if the regex fails to compile > so we can catch any extended cruft that we might miss so we can fix it... > > > On Sep 21, 2016, at 5:15 PM, Greg Clayton <gclay...@apple.com> wrote: > > > > To be clear: if we can make StringRef work efficiently, I am fine with > > that. We can just cut over to using llvm::Regex where it uses the start and > > end pointer. I would just like to avoid making string copies for no reason. > > I don't have anything against using StringRef as long as we do it > > efficiently. > > > > Greg > > > > > >> On Sep 21, 2016, at 5:13 PM, Greg Clayton via lldb-commits > >> <lldb-commits@lists.llvm.org> wrote: > >> > >>> > >>> On Sep 21, 2016, at 4:43 PM, Zachary Turner <ztur...@google.com> wrote: > >>> > >>> You need to duplicate something on the heap once when you execute the > >>> regex. And in turn you save tens or hundreds or copies on the way there > >>> because of inefficient string usage. > >> > >> Where? please show this. > >> > >> I see the following callers: > >> > >> const char *class_name = > >> iterator->second->GetClassName().AsCString("<unknown>"); > >> if (regex_up && class_name && > >> !regex_up->Execute(llvm::StringRef(class_name))) > >> > >> You are adding a strlen() call here to construct the StringRef, not saving > >> anything. > >> > >> > >> bool CommandObjectRegexCommand::DoExecute(const char *command, > >> CommandReturnObject &result) { > >> if (command) { > >> EntryCollection::const_iterator pos, end = m_entries.end(); > >> for (pos = m_entries.begin(); pos != end; ++pos) { > >> RegularExpression::Match regex_match(m_max_matches); > >> > >> if (pos->regex.Execute(command, ®ex_match)) { > >> std::string new_command(pos->command); > >> std::string match_str; > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor. > >> > >> > >> DataVisualization::Categories::ForEach( > >> [®ex, &result](const lldb::TypeCategoryImplSP &category_sp) -> > >> bool { > >> if (regex) { > >> bool escape = true; > >> if (regex->GetText() == category_sp->GetName()) { > >> escape = false; > >> } else if (regex->Execute(llvm::StringRef::withNullAsEmpty( > >> category_sp->GetName()))) { > >> escape = false; > >> } > >> > >> if (escape) > >> return true; > >> } > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor. > >> > >> > >> TypeCategoryImpl::ForEachCallbacks<FormatterType> foreach; > >> foreach > >> .SetExact([&result, &formatter_regex, &any_printed]( > >> ConstString name, > >> const FormatterSharedPointer &format_sp) -> bool { > >> if (formatter_regex) { > >> bool escape = true; > >> if (name.GetStringRef() == formatter_regex->GetText()) { > >> escape = false; > >> } else if (formatter_regex->Execute(name.GetStringRef())) { > >> escape = false; > >> } > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor. > >> > >> bool ParseCoordinate(const char *id_cstr) { > >> RegularExpression regex; > >> RegularExpression::Match regex_match(3); > >> > >> llvm::StringRef id_ref = llvm::StringRef::withNullAsEmpty(id_cstr); > >> bool matched = false; > >> if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+),([0-9]+)$")) && > >> regex.Execute(id_ref, ®ex_match)) > >> matched = true; > >> else if (regex.Compile(llvm::StringRef("^([0-9]+),([0-9]+)$")) && > >> regex.Execute(id_ref, ®ex_match)) > >> matched = true; > >> else if (regex.Compile(llvm::StringRef("^([0-9]+)$")) && > >> regex.Execute(id_ref, ®ex_match)) > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor. > >> > >> void DWARFCompileUnit::ParseProducerInfo() { > >> m_producer_version_major = UINT32_MAX; > >> m_producer_version_minor = UINT32_MAX; > >> m_producer_version_update = UINT32_MAX; > >> > >> const DWARFDebugInfoEntry *die = GetCompileUnitDIEPtrOnly(); > >> if (die) { > >> > >> const char *producer_cstr = die->GetAttributeValueAsString( > >> m_dwarf2Data, this, DW_AT_producer, NULL); > >> if (producer_cstr) { > >> RegularExpression llvm_gcc_regex( > >> llvm::StringRef("^4\\.[012]\\.[01] \\(Based on Apple " > >> "Inc\\. build [0-9]+\\) \\(LLVM build " > >> "[\\.0-9]+\\)$")); > >> if (llvm_gcc_regex.Execute(llvm::StringRef(producer_cstr))) { > >> m_producer = eProducerLLVMGCC; > >> } else if (strstr(producer_cstr, "clang")) { > >> static RegularExpression g_clang_version_regex( > >> llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)")); > >> RegularExpression::Match regex_match(3); > >> if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr), > >> ®ex_match)) { > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor (2 of > >> them mind you). > >> > >> void DWARFDebugPubnamesSet::Find( > >> const RegularExpression ®ex, > >> std::vector<dw_offset_t> &die_offset_coll) const { > >> DescriptorConstIter pos; > >> DescriptorConstIter end = m_descriptors.end(); > >> for (pos = m_descriptors.begin(); pos != end; ++pos) { > >> if (regex.Execute(pos->name.c_str())) > >> die_offset_coll.push_back(m_header.die_offset + pos->offset); > >> } > >> } > >> > >> No copy saved. Just wasted time with strlen in StringRef constructor (2 of > >> them mind you). > >> > >> > >> std::string slice_str; > >> if (reg_info_dict->GetValueForKeyAsString("slice", slice_str, > >> nullptr)) { > >> // Slices use the following format: > >> // REGNAME[MSBIT:LSBIT] > >> // REGNAME - name of the register to grab a slice of > >> // MSBIT - the most significant bit at which the current register > >> value > >> // starts at > >> // LSBIT - the least significant bit at which the current register > >> value > >> // ends at > >> static RegularExpression g_bitfield_regex( > >> > >> llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]")); > >> RegularExpression::Match regex_match(3); > >> if (g_bitfield_regex.Execute(slice_str, ®ex_match)) { > >> > >> No copy saved. > >> > >> void SourceManager::File::FindLinesMatchingRegex( > >> RegularExpression ®ex, uint32_t start_line, uint32_t end_line, > >> std::vector<uint32_t> &match_lines) { > >> match_lines.clear(); > >> > >> if (!LineIsValid(start_line) || > >> (end_line != UINT32_MAX && !LineIsValid(end_line))) > >> return; > >> if (start_line > end_line) > >> return; > >> > >> for (uint32_t line_no = start_line; line_no < end_line; line_no++) { > >> std::string buffer; > >> if (!GetLine(line_no, buffer)) > >> break; > >> if (regex.Execute(buffer)) { > >> match_lines.push_back(line_no); > >> } > >> } > >> } > >> > >> No copy saved. > >> > >> > >> static dw_offset_t > >> FindCallbackString(SymbolFileDWARF *dwarf2Data, DWARFCompileUnit *cu, > >> DWARFDebugInfoEntry *die, const dw_offset_t next_offset, > >> const uint32_t curr_depth, void *userData) { > >> FindCallbackStringInfo *info = (FindCallbackStringInfo *)userData; > >> > >> if (!die) > >> return next_offset; > >> > >> const char *die_name = die->GetName(dwarf2Data, cu); > >> if (!die_name) > >> return next_offset; > >> > >> if (info->regex) { > >> if (info->regex->Execute(llvm::StringRef(die_name))) > >> info->die_offsets.push_back(die->GetOffset()); > >> } else { > >> if ((info->ignore_case ? strcasecmp(die_name, info->name) > >> : strcmp(die_name, info->name)) == 0) > >> info->die_offsets.push_back(die->GetOffset()); > >> } > >> > >> // Just return the current offset to parse the next CU or DIE entry > >> return next_offset; > >> } > >> > >> No copy saved. > >> > >> > >> bool Get_Impl(ConstString key, MapValueType &value, > >> lldb::RegularExpressionSP *dummy) { > >> llvm::StringRef key_str = key.GetStringRef(); > >> std::lock_guard<std::recursive_mutex> guard(m_format_map.mutex()); > >> MapIterator pos, end = m_format_map.map().end(); > >> for (pos = m_format_map.map().begin(); pos != end; pos++) { > >> lldb::RegularExpressionSP regex = pos->first; > >> if (regex->Execute(key_str)) { > >> value = pos->second; > >> return true; > >> } > >> } > >> return false; > >> } > >> > >> No copy saved. > >> > >> > >> PacketResult echo_packet_result = > >> SendPacketNoLock(llvm::StringRef(echo_packet, > >> echo_packet_len)); > >> if (echo_packet_result == PacketResult::Success) { > >> const uint32_t max_retries = 3; > >> uint32_t successful_responses = 0; > >> for (uint32_t i = 0; i < max_retries; ++i) { > >> StringExtractorGDBRemote echo_response; > >> echo_packet_result = > >> WaitForPacketWithTimeoutMicroSecondsNoLock( > >> echo_response, timeout_usec, false); > >> if (echo_packet_result == PacketResult::Success) { > >> ++successful_responses; > >> if (response_regex.Execute(echo_response.GetStringRef())) { > >> sync_success = true; > >> > >> No copy saved. > >> > >> > >> OptionValueSP Instruction::ReadArray(FILE *in_file, Stream *out_stream, > >> OptionValue::Type data_type) { > >> bool done = false; > >> char buffer[1024]; > >> > >> OptionValueSP option_value_sp(new OptionValueArray(1u << data_type)); > >> > >> int idx = 0; > >> while (!done) { > >> if (!fgets(buffer, 1023, in_file)) { > >> out_stream->Printf( > >> "Instruction::ReadArray: Error reading file (fgets).\n"); > >> option_value_sp.reset(); > >> return option_value_sp; > >> } > >> > >> std::string line(buffer); > >> > >> size_t len = line.size(); > >> if (line[len - 1] == '\n') { > >> line[len - 1] = '\0'; > >> line.resize(len - 1); > >> } > >> > >> if ((line.size() == 1) && line[0] == ']') { > >> done = true; > >> line.clear(); > >> } > >> > >> if (!line.empty()) { > >> std::string value; > >> static RegularExpression g_reg_exp( > >> llvm::StringRef("^[ \t]*([^ \t]+)[ \t]*$")); > >> RegularExpression::Match regex_match(1); > >> bool reg_exp_success = g_reg_exp.Execute(line, ®ex_match); > >> if (reg_exp_success) > >> regex_match.GetMatchAtIndex(line.c_str(), 1, value); > >> else > >> value = line; > >> > >> No copy saved. > >> > >> I could do on with all 31 call sites, but I will stop. All that was added > >> was inefficiency and requiring us to make a copy of the string before it > >> is used to evaluate. Some of these are evaluated in tight loops, like the > >> searching for type summaries and formats that are regex based. Happens > >> once for each item in a SBValue (and once per child that is displayed... > >> > >>> We could also just un-delete the overload that takes a const char*, then > >>> the duplication would only ever happen when you explicitly use a > >>> StringRef. > >> > >> For execute it is fine to make one that takes a StringRef, but it would > >> just call .str().c_str() and call the other one. We would assume the > >> "const char *" version of execute would be null terminated. > >> > >>> > >>> I don't agree this should be reverted. In the process of doing this > >>> conversion I eliminated numerous careless string copies. > >> > >> As I showed above the opposite was true. We made many calls to strlen to > >> construct the StringRef so we actually made things slower. > >> > >> Greg > >> > >>> > >>> On Wed, Sep 21, 2016 at 4:38 PM Greg Clayton <gclay...@apple.com> wrote: > >>> This it the perfect example of why not to use a StringRef since the > >>> string needs to be null terminated. Why did we change this? Now even if > >>> you call this function: > >>> > >>> RegularExpression r(...); > >>> > >>> r.Execute(".......................", ...) > >>> > >>> You will need to duplicate the string on the heap just to execute this. > >>> Please revert this. Anything that requires null terminate is not a > >>> candidate for converting to StringRef. > >>> > >>> > >>>> On Sep 21, 2016, at 10:13 AM, Zachary Turner via lldb-commits > >>>> <lldb-commits@lists.llvm.org> wrote: > >>>> > >>>> Author: zturner > >>>> Date: Wed Sep 21 12:13:51 2016 > >>>> New Revision: 282090 > >>>> > >>>> URL: http://llvm.org/viewvc/llvm-project?rev=282090&view=rev > >>>> Log: > >>>> Fix failing regex tests. > >>>> > >>>> r282079 converted the regular expression interface to accept > >>>> and return StringRefs instead of char pointers. In one case > >>>> a null pointer check was converted to an empty string check, > >>>> but this was an incorrect conversion because an empty string > >>>> is a valid regular expression. Removing this check should > >>>> fix the test failures. > >>>> > >>>> Modified: > >>>> lldb/trunk/source/Core/RegularExpression.cpp > >>>> > >>>> Modified: lldb/trunk/source/Core/RegularExpression.cpp > >>>> URL: > >>>> http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Core/RegularExpression.cpp?rev=282090&r1=282089&r2=282090&view=diff > >>>> ============================================================================== > >>>> --- lldb/trunk/source/Core/RegularExpression.cpp (original) > >>>> +++ lldb/trunk/source/Core/RegularExpression.cpp Wed Sep 21 12:13:51 2016 > >>>> @@ -102,7 +102,7 @@ bool RegularExpression::Compile(llvm::St > >>>> //--------------------------------------------------------------------- > >>>> bool RegularExpression::Execute(llvm::StringRef str, Match *match) const > >>>> { > >>>> int err = 1; > >>>> - if (!str.empty() && m_comp_err == 0) { > >>>> + if (m_comp_err == 0) { > >>>> // Argument to regexec must be null-terminated. > >>>> std::string reg_str = str; > >>>> if (match) { > >>>> > >>>> > >>>> _______________________________________________ > >>>> lldb-commits mailing list > >>>> lldb-commits@lists.llvm.org > >>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits > >>> > >> > >> _______________________________________________ > >> lldb-commits mailing list > >> lldb-commits@lists.llvm.org > >> http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits > > > _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits