================ @@ -0,0 +1,131 @@ +//===-- DILLexer.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_VALUEOBJECT_DILLEXER_H_ +#define LLDB_VALUEOBJECT_DILLEXER_H_ + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Error.h" +#include <cstdint> +#include <limits.h> +#include <memory> +#include <string> +#include <vector> + +namespace lldb_private::dil { + +/// Class defining the tokens generated by the DIL lexer and used by the +/// DIL parser. +class Token { +public: + enum Kind { + coloncolon, + eof, + identifier, + l_paren, + r_paren, + unknown, + }; + + Token(Kind kind, std::string spelling, uint32_t start) + : m_kind(kind), m_spelling(spelling), m_start_pos(start) {} + + Kind GetKind() const { return m_kind; } + + std::string GetSpelling() const { return m_spelling; } + + bool Is(Kind kind) const { return m_kind == kind; } + + bool IsNot(Kind kind) const { return m_kind != kind; } + + bool IsOneOf(Kind kind1, Kind kind2) const { return Is(kind1) || Is(kind2); } + + template <typename... Ts> bool IsOneOf(Kind kind, Ts... Ks) const { + return Is(kind) || IsOneOf(Ks...); + } + + uint32_t GetLocation() const { return m_start_pos; } + + static llvm::StringRef GetTokenName(Kind kind); + +private: + Kind m_kind; + std::string m_spelling; + uint32_t m_start_pos; // within entire expression string +}; + +/// Class for doing the simple lexing required by DIL. +class DILLexer { +public: + /// Lexes all the tokens in expr and calls the private constructor + /// with the lexed tokens. + static llvm::Expected<DILLexer> Create(llvm::StringRef expr); + + /// Return the current token to be handled by the DIL parser. + const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; } + + /// Advance the current token position by N. + void Advance(uint32_t N = 1) { + // UINT_MAX means uninitialized, no "current" position, so move to start. + if (m_tokens_idx == UINT_MAX) ---------------- labath wrote:
And why is that the case? I don't find this argument particularly convincing. All it does is create a weird "before the first token" state, which you have to account for both in the implementation of the lexer functions and in the functions that use that. I think things would be much simpler if you could just always assume that the lexer has a valid "current" position, and I think that's basically we have the eof pseudo-token. If there was some value in having a "before-the first token" state, then i think it'd be better to have a `bof` pseudo-token to match that (but I doubt that's the case). https://github.com/llvm/llvm-project/pull/123521 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits