================
@@ -0,0 +1,132 @@
+//===-- DILLexer.cpp 
------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "lldb/Utility/Status.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace lldb_private::dil {
+
+llvm::StringRef Token::GetTokenName(Kind kind) {
+  switch (kind) {
+  case Kind::coloncolon:
+    return "coloncolon";
+  case Kind::eof:
+    return "eof";
+  case Kind::identifier:
+    return "identifier";
+  case Kind::l_paren:
+    return "l_paren";
+  case Kind::r_paren:
+    return "r_paren";
+  case Kind::unknown:
+    return "unknown";
+  }
+}
+
+static bool IsLetter(char c) {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+
+static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
+
+// A word starts with a letter, underscore, or dollar sign, followed by
+// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or  underscores.
+static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
+                                             llvm::StringRef &remainder) {
+  llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
+  llvm::StringRef::iterator start = cur_pos;
+  bool dollar_start = false;
+
+  // Must not start with a digit.
+  if (cur_pos == expr.end() || IsDigit(*cur_pos))
+    return std::nullopt;
+
+  // First character *may* be a '$', for a register name or convenience
+  // variable.
+  if (*cur_pos == '$') {
+    dollar_start = true;
+    ++cur_pos;
+  }
+
+  // Contains only letters, digits or underscores
+  for (; cur_pos != expr.end(); ++cur_pos) {
+    char c = *cur_pos;
+    if (!IsLetter(c) && !IsDigit(c) && c != '_')
+      break;
+  }
+
+  // If first char is '$', make sure there's at least one mare char, or it's
+  // invalid.
+  if (dollar_start && (cur_pos - start <= 1)) {
+    cur_pos = start;
+    return std::nullopt;
+  }
+
+  if (cur_pos == start)
+    return std::nullopt;
+
+  llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start);
+  if (remainder.consume_front(word))
+    return word;
+
+  return std::nullopt;
----------------
labath wrote:

The key thing to realize here is that a StringRef is just a fancy name for a 
`pair<const char *, size_t>`. On its own, it doesn't make anything valid. It's 
only valid as long as the string it points to is valid. And the same goes for 
the validity pointers you get from the StringRef. This is just a pointer to the 
parsed string, which will remain valid for as long as that string is around -- 
regardless of what happens to the StringRef.

That said, I realized a different problem with this code. It will end up 
modifying the remainder even in the failure case. So here's a slightly 
different version which avoids that. This one isn't completely equivalent, as 
I've dropped the requirement on the positioning of the dollar sign, but that's 
something I very strongly believe we should do anyway.

```
StringRef candidate = remainder.take_while([](char c) { return IsDigit(c) || 
IsLetter(c) || c=='_' || c=='$'; });
if (candidate.empty() || IsDigit(candidate[0]))
  return std::nullopt;
remainder.drop_front(candidate.size());
return candidate;
```

https://github.com/llvm/llvm-project/pull/123521
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to