================
@@ -0,0 +1,132 @@
+//===-- DILLexer.cpp 
------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "lldb/Utility/Status.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace lldb_private::dil {
+
+llvm::StringRef Token::GetTokenName(Kind kind) {
+  switch (kind) {
+  case Kind::coloncolon:
+    return "coloncolon";
+  case Kind::eof:
+    return "eof";
+  case Kind::identifier:
+    return "identifier";
+  case Kind::l_paren:
+    return "l_paren";
+  case Kind::r_paren:
+    return "r_paren";
+  case Kind::unknown:
+    return "unknown";
+  }
+}
+
+static bool IsLetter(char c) {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+
+static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
+
+// A word starts with a letter, underscore, or dollar sign, followed by
+// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or  underscores.
+static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
+                                             llvm::StringRef &remainder) {
+  llvm::StringRef::iterator cur_pos = expr.end() - remainder.size();
+  llvm::StringRef::iterator start = cur_pos;
+  bool dollar_start = false;
+
+  // Must not start with a digit.
+  if (cur_pos == expr.end() || IsDigit(*cur_pos))
+    return std::nullopt;
+
+  // First character *may* be a '$', for a register name or convenience
+  // variable.
+  if (*cur_pos == '$') {
+    dollar_start = true;
+    ++cur_pos;
+  }
+
+  // Contains only letters, digits or underscores
+  for (; cur_pos != expr.end(); ++cur_pos) {
+    char c = *cur_pos;
+    if (!IsLetter(c) && !IsDigit(c) && c != '_')
+      break;
+  }
+
+  // If first char is '$', make sure there's at least one mare char, or it's
+  // invalid.
+  if (dollar_start && (cur_pos - start <= 1)) {
+    cur_pos = start;
+    return std::nullopt;
+  }
+
+  if (cur_pos == start)
+    return std::nullopt;
+
+  llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start);
+  if (remainder.consume_front(word))
+    return word;
+
+  return std::nullopt;
----------------
labath wrote:

This should be equivalent to your code, but I'm only suggesting this to show 
how code like this can be written in a shorter and more stringref-y fashion. I 
think the actual algorithm will have to change, as it has a very narrow 
definition of identifiers.
```suggestion
  const char *start = remainder.data();
  remainder.consume_front("$"); // initial '$' is valid
  remainder = remainder.drop_while([](char c){  return IsDigit(c) || 
IsLetter(c) || c=='_'; });
  llvm::StringRef candidate(start, remainder.data()-start);
  if (candidate.empty() || candidate == "$" || IsDigit(candidate[0]))
    return std::nullopt;
  return candidate;
```

https://github.com/llvm/llvm-project/pull/123521
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to