================ @@ -0,0 +1,132 @@ +//===-- DILLexer.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This implements the recursive descent parser for the Data Inspection +// Language (DIL), and its helper functions, which will eventually underlie the +// 'frame variable' command. The language that this parser recognizes is +// described in lldb/docs/dil-expr-lang.ebnf +// +//===----------------------------------------------------------------------===// + +#include "lldb/ValueObject/DILLexer.h" +#include "lldb/Utility/Status.h" +#include "llvm/ADT/StringSwitch.h" + +namespace lldb_private::dil { + +llvm::StringRef Token::GetTokenName(Kind kind) { + switch (kind) { + case Kind::coloncolon: + return "coloncolon"; + case Kind::eof: + return "eof"; + case Kind::identifier: + return "identifier"; + case Kind::l_paren: + return "l_paren"; + case Kind::r_paren: + return "r_paren"; + case Kind::unknown: + return "unknown"; + } +} + +static bool IsLetter(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} + +static bool IsDigit(char c) { return '0' <= c && c <= '9'; } + +// A word starts with a letter, underscore, or dollar sign, followed by +// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. +static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr, + llvm::StringRef &remainder) { + llvm::StringRef::iterator cur_pos = expr.end() - remainder.size(); + llvm::StringRef::iterator start = cur_pos; + bool dollar_start = false; + + // Must not start with a digit. + if (cur_pos == expr.end() || IsDigit(*cur_pos)) + return std::nullopt; + + // First character *may* be a '$', for a register name or convenience + // variable. + if (*cur_pos == '$') { + dollar_start = true; + ++cur_pos; + } + + // Contains only letters, digits or underscores + for (; cur_pos != expr.end(); ++cur_pos) { + char c = *cur_pos; + if (!IsLetter(c) && !IsDigit(c) && c != '_') + break; + } + + // If first char is '$', make sure there's at least one mare char, or it's + // invalid. + if (dollar_start && (cur_pos - start <= 1)) { + cur_pos = start; + return std::nullopt; + } + + if (cur_pos == start) + return std::nullopt; + + llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start); + if (remainder.consume_front(word)) + return word; + + return std::nullopt; ---------------- labath wrote:
The key thing to realize here is that a StringRef is just a fancy name for a `pair<const char *, size_t>`. On its own, it doesn't make anything valid. It's only valid as long as the string it points to is valid. And the same goes for the validity pointers you get from the StringRef. This is just a pointer to the parsed string, which will remain valid for as long as that string is around -- regardless of what happens to the StringRef. That said, I realized a different problem with this code. It will end up modifying the remainder even in the failure case. So here's a slightly different version which avoids that. This one isn't completely equivalent, as I've dropped the requirement on the positioning of the dollar sign, but that's something I very strongly believe we should do anyway. ``` StringRef candidate = remainder.take_while([](char c) { return IsDigit(c) || IsLetter(c) || c=='_' || c=='$'; }); if (candidate.empty() || IsDigit(candidate[0])) return std::nullopt; remainder.drop_front(candidate.size()); return candidate; ``` https://github.com/llvm/llvm-project/pull/123521 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits