https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/122981
>From 98deff6a407b912852e70b2bdc3618aaec8a1931 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Fri, 24 Jan 2025 22:23:39 +0000 Subject: [PATCH 01/12] [HLSL][RootSignature] Initial Lexer Definition with puncuators - Defines the RootSignatureLexer class - Defines the test harness required for testing - Implements the punctuator tokens and tests functionality --- .../include/clang/Basic/DiagnosticLexKinds.td | 6 + .../Parse/HLSLRootSignatureTokenKinds.def | 35 ++++ .../clang/Parse/ParseHLSLRootSignature.h | 79 +++++++++ clang/lib/Parse/CMakeLists.txt | 1 + clang/lib/Parse/ParseHLSLRootSignature.cpp | 50 ++++++ clang/unittests/CMakeLists.txt | 1 + clang/unittests/Parse/CMakeLists.txt | 26 +++ .../Parse/ParseHLSLRootSignatureTest.cpp | 167 ++++++++++++++++++ 8 files changed, 365 insertions(+) create mode 100644 clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def create mode 100644 clang/include/clang/Parse/ParseHLSLRootSignature.h create mode 100644 clang/lib/Parse/ParseHLSLRootSignature.cpp create mode 100644 clang/unittests/Parse/CMakeLists.txt create mode 100644 clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 959376b0847216c..7755c05bc8969ba 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1017,4 +1017,10 @@ Error<"'#pragma unsafe_buffer_usage' was not ended">; def err_pp_pragma_unsafe_buffer_usage_syntax : Error<"expected 'begin' or 'end'">; + +// HLSL Root Signature Lexing Errors +let CategoryName = "Root Signature Lexical Issue" in { + def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">; +} + } diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def new file mode 100644 index 000000000000000..9625f6a5bd76d9e --- /dev/null +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -0,0 +1,35 @@ +//===--- HLSLRootSignature.def - Tokens and Enum Database -------*- C++ -*-===// + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenKinds used in the Root Signature DSL. This +// includes keywords, enums and a small subset of punctuators. Users of this +// file must optionally #define the TOK, KEYWORD, ENUM or specific ENUM macros +// to make use of this file. +// +//===----------------------------------------------------------------------===// + +#ifndef TOK +#define TOK(X) +#endif +#ifndef PUNCTUATOR +#define PUNCTUATOR(X,Y) TOK(pu_ ## X) +#endif + +// General Tokens: +TOK(invalid) + +// Punctuators: +PUNCTUATOR(l_paren, '(') +PUNCTUATOR(r_paren, ')') +PUNCTUATOR(comma, ',') +PUNCTUATOR(or, '|') +PUNCTUATOR(equal, '=') + +#undef PUNCTUATOR +#undef TOK diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h new file mode 100644 index 000000000000000..39069e7cc399883 --- /dev/null +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -0,0 +1,79 @@ +//===--- ParseHLSLRootSignature.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the ParseHLSLRootSignature interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PARSE_PARSEHLSLROOTSIGNATURE_H +#define LLVM_CLANG_PARSE_PARSEHLSLROOTSIGNATURE_H + +#include "clang/Basic/DiagnosticLex.h" +#include "clang/Lex/Preprocessor.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace hlsl { + +struct RootSignatureToken { + enum Kind { +#define TOK(X) X, +#include "clang/Parse/HLSLRootSignatureTokenKinds.def" + }; + + Kind Kind = Kind::invalid; + + // Retain the SouceLocation of the token for diagnostics + clang::SourceLocation TokLoc; + + // Constructors + RootSignatureToken(clang::SourceLocation TokLoc) : TokLoc(TokLoc) {} +}; +using TokenKind = enum RootSignatureToken::Kind; + +class RootSignatureLexer { +public: + RootSignatureLexer(StringRef Signature, clang::SourceLocation SourceLoc, + clang::Preprocessor &PP) + : Buffer(Signature), SourceLoc(SourceLoc), PP(PP) {} + + // Consumes the internal buffer as a list of tokens and will emplace them + // onto the given tokens. + // + // It will consume until it successfully reaches the end of the buffer, + // or, until the first error is encountered. The return value denotes if + // there was a failure. + bool Lex(SmallVector<RootSignatureToken> &Tokens); + +private: + // Internal buffer to iterate over + StringRef Buffer; + + // Passed down parameters from Sema + clang::SourceLocation SourceLoc; + clang::Preprocessor &PP; + + // Consumes the internal buffer for a single token. + // + // The return value denotes if there was a failure. + bool LexToken(RootSignatureToken &Token); + + // Advance the buffer by the specified number of characters. Updates the + // SourceLocation appropriately. + void AdvanceBuffer(unsigned NumCharacters = 1) { + Buffer = Buffer.drop_front(NumCharacters); + SourceLoc = SourceLoc.getLocWithOffset(NumCharacters); + } +}; + +} // namespace hlsl +} // namespace clang + +#endif // LLVM_CLANG_PARSE_PARSEHLSLROOTSIGNATURE_H diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt index 22e902f7e1bc500..00fde537bb9c607 100644 --- a/clang/lib/Parse/CMakeLists.txt +++ b/clang/lib/Parse/CMakeLists.txt @@ -14,6 +14,7 @@ add_clang_library(clangParse ParseExpr.cpp ParseExprCXX.cpp ParseHLSL.cpp + ParseHLSLRootSignature.cpp ParseInit.cpp ParseObjc.cpp ParseOpenMP.cpp diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp new file mode 100644 index 000000000000000..a9a9d209085c91b --- /dev/null +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -0,0 +1,50 @@ +#include "clang/Parse/ParseHLSLRootSignature.h" + +namespace clang { +namespace hlsl { + +// Lexer Definitions + +bool RootSignatureLexer::Lex(SmallVector<RootSignatureToken> &Tokens) { + // Discard any leading whitespace + AdvanceBuffer(Buffer.take_while(isspace).size()); + + while (!Buffer.empty()) { + // Record where this token is in the text for usage in parser diagnostics + RootSignatureToken Result(SourceLoc); + if (LexToken(Result)) + return true; + + // Successfully Lexed the token so we can store it + Tokens.push_back(Result); + + // Discard any trailing whitespace + AdvanceBuffer(Buffer.take_while(isspace).size()); + } + + return false; +} + +bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { + char C = Buffer.front(); + + // Punctuators + switch (C) { +#define PUNCTUATOR(X, Y) \ + case Y: { \ + Result.Kind = TokenKind::pu_##X; \ + AdvanceBuffer(); \ + return false; \ + } +#include "clang/Parse/HLSLRootSignatureTokenKinds.def" + default: + break; + } + + // Unable to match on any token type + PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); + return true; +} + +} // namespace hlsl +} // namespace clang diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index 85d265426ec80be..9b3ce8aa7de7390 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -25,6 +25,7 @@ endfunction() add_subdirectory(Basic) add_subdirectory(Lex) +add_subdirectory(Parse) add_subdirectory(Driver) if(CLANG_ENABLE_STATIC_ANALYZER) add_subdirectory(Analysis) diff --git a/clang/unittests/Parse/CMakeLists.txt b/clang/unittests/Parse/CMakeLists.txt new file mode 100644 index 000000000000000..1b7eb4934a46c8a --- /dev/null +++ b/clang/unittests/Parse/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_unittest(ParseTests + ParseHLSLRootSignatureTest.cpp + ) + +clang_target_link_libraries(ParseTests + PRIVATE + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangParse + clangSema + clangSerialization + clangTooling + ) + +target_link_libraries(ParseTests + PRIVATE + LLVMTestingAnnotations + LLVMTestingSupport + clangTesting + ) diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp new file mode 100644 index 000000000000000..e5f88bbfa0ff6af --- /dev/null +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -0,0 +1,167 @@ +//=== ParseHLSLRootSignatureTest.cpp - Parse Root Signature tests ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" + +#include "clang/Parse/ParseHLSLRootSignature.h" +#include "gtest/gtest.h" + +using namespace clang; + +namespace { + +// Diagnostic helper for helper tests +class ExpectedDiagConsumer : public DiagnosticConsumer { + virtual void anchor() {} + + void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, + const Diagnostic &Info) override { + if (!FirstDiag || !ExpectedDiagID.has_value()) { + Satisfied = false; + return; + } + FirstDiag = false; + + Satisfied = ExpectedDiagID.value() == Info.getID(); + } + + bool FirstDiag = true; + bool Satisfied = false; + std::optional<unsigned> ExpectedDiagID; + +public: + void SetNoDiag() { + Satisfied = true; + ExpectedDiagID = std::nullopt; + } + + void SetExpected(unsigned DiagID) { + Satisfied = false; + ExpectedDiagID = DiagID; + } + + bool IsSatisfied() { return Satisfied; } +}; + +// The test fixture. +class ParseHLSLRootSignatureTest : public ::testing::Test { +protected: + ParseHLSLRootSignatureTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Consumer(new ExpectedDiagConsumer()), + Diags(DiagID, new DiagnosticOptions, Consumer), + SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { + TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; + Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); + } + + std::unique_ptr<Preprocessor> CreatePP(StringRef Source, + TrivialModuleLoader &ModLoader) { + std::unique_ptr<llvm::MemoryBuffer> Buf = + llvm::MemoryBuffer::getMemBuffer(Source); + SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); + + HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, + Diags, LangOpts, Target.get()); + std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>( + std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr, + HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP->Initialize(*Target); + PP->EnterMainSourceFile(); + return PP; + } + + void CheckTokens(SmallVector<hlsl::RootSignatureToken> &Computed, + SmallVector<hlsl::TokenKind> &Expected) { + ASSERT_EQ(Computed.size(), Expected.size()); + for (unsigned I = 0, E = Expected.size(); I != E; ++I) { + ASSERT_EQ(Computed[I].Kind, Expected[I]); + } + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr<DiagnosticIDs> DiagID; + ExpectedDiagConsumer *Consumer; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + std::shared_ptr<TargetOptions> TargetOpts; + IntrusiveRefCntPtr<TargetInfo> Target; +}; + +// Valid Lexing Tests + +TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { + // This test will check that we can lex all defined tokens as defined in + // HLSLRootSignatureTokenKinds.def, plus some additional integer variations + const llvm::StringLiteral Source = R"cc( + (),|= + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test no diagnostics produced + Consumer->SetNoDiag(); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens = { + hlsl::RootSignatureToken( + SourceLocation()) // invalid token for completeness + }; + ASSERT_FALSE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Consumer->IsSatisfied()); + + SmallVector<hlsl::TokenKind> Expected = { +#define TOK(NAME) hlsl::TokenKind::NAME, +#include "clang/Parse/HLSLRootSignatureTokenKinds.def" + }; + + CheckTokens(Tokens, Expected); +} + +// Invalid Lexing Tests + +TEST_F(ParseHLSLRootSignatureTest, InvalidLexIdentifierTest) { + // This test will check that the lexing fails due to no valid token + const llvm::StringLiteral Source = R"cc( + notAToken + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test correct diagnostic produced + Consumer->SetExpected(diag::err_hlsl_invalid_token); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens; + ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Consumer->IsSatisfied()); +} + +} // anonymous namespace >From 5f43ed80a6173ecf0b234da8b93cde2f76b2ef5b Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Fri, 24 Jan 2025 22:28:05 +0000 Subject: [PATCH 02/12] Add lexing of integer literals - Integrate the use of the `NumericLiteralParser` to lex integer literals - Add additional hlsl specific diagnostics messages --- .../include/clang/Basic/DiagnosticLexKinds.td | 4 ++ .../Parse/HLSLRootSignatureTokenKinds.def | 1 + .../clang/Parse/ParseHLSLRootSignature.h | 6 ++ clang/lib/Parse/ParseHLSLRootSignature.cpp | 59 ++++++++++++++++ .../Parse/ParseHLSLRootSignatureTest.cpp | 69 +++++++++++++++++++ 5 files changed, 139 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 7755c05bc8969ba..81d314c838cedec 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1020,6 +1020,10 @@ Error<"expected 'begin' or 'end'">; // HLSL Root Signature Lexing Errors let CategoryName = "Root Signature Lexical Issue" in { + def err_hlsl_invalid_number_literal: + Error<"expected number literal is not a supported number literal of unsigned integer or integer">; + def err_hlsl_number_literal_overflow : + Error<"provided %select{unsigned integer|signed integer}0 literal '%1' that overflows the maximum of 32 bits">; def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">; } diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def index 9625f6a5bd76d9e..64c5fd14a2017f0 100644 --- a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -23,6 +23,7 @@ // General Tokens: TOK(invalid) +TOK(int_literal) // Punctuators: PUNCTUATOR(l_paren, '(') diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index 39069e7cc399883..3b2c61781b1da30 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -13,7 +13,9 @@ #ifndef LLVM_CLANG_PARSE_PARSEHLSLROOTSIGNATURE_H #define LLVM_CLANG_PARSE_PARSEHLSLROOTSIGNATURE_H +#include "clang/AST/APValue.h" #include "clang/Basic/DiagnosticLex.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +35,8 @@ struct RootSignatureToken { // Retain the SouceLocation of the token for diagnostics clang::SourceLocation TokLoc; + APValue NumLiteral = APValue(); + // Constructors RootSignatureToken(clang::SourceLocation TokLoc) : TokLoc(TokLoc) {} }; @@ -60,6 +64,8 @@ class RootSignatureLexer { clang::SourceLocation SourceLoc; clang::Preprocessor &PP; + bool LexNumber(RootSignatureToken &Result); + // Consumes the internal buffer for a single token. // // The return value denotes if there was a failure. diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index a9a9d209085c91b..1cc75998ca07da3 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -5,6 +5,61 @@ namespace hlsl { // Lexer Definitions +static bool IsNumberChar(char C) { + // TODO(#120472): extend for float support exponents + return isdigit(C); // integer support +} + +bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { + // NumericLiteralParser does not handle the sign so we will manually apply it + bool Negative = Buffer.front() == '-'; + bool Signed = Negative || Buffer.front() == '+'; + if (Signed) + AdvanceBuffer(); + + // Retrieve the possible number + StringRef NumSpelling = Buffer.take_while(IsNumberChar); + + // Catch this now as the Literal Parser will accept it as valid + if (NumSpelling.empty()) { + PP.getDiagnostics().Report(Result.TokLoc, + diag::err_hlsl_invalid_number_literal); + return true; + } + + // Parse the numeric value and do semantic checks on its specification + clang::NumericLiteralParser Literal(NumSpelling, SourceLoc, + PP.getSourceManager(), PP.getLangOpts(), + PP.getTargetInfo(), PP.getDiagnostics()); + if (Literal.hadError) + return true; // Error has already been reported so just return + + if (!Literal.isIntegerLiteral()) { + // Note: if IsNumberChar allows for hexidecimal we will need to turn this + // into a diagnostics for potential fixed-point literals + llvm_unreachable("IsNumberChar will only support digits"); + return true; + } + + // Retrieve the number value to store into the token + Result.Kind = TokenKind::int_literal; + + llvm::APSInt X = llvm::APSInt(32, !Signed); + if (Literal.GetIntegerValue(X)) { + // Report that the value has overflowed + PP.getDiagnostics().Report(Result.TokLoc, + diag::err_hlsl_number_literal_overflow) + << (unsigned)Signed << NumSpelling; + return true; + } + + X = Negative ? -X : X; + Result.NumLiteral = APValue(X); + + AdvanceBuffer(NumSpelling.size()); + return false; +} + bool RootSignatureLexer::Lex(SmallVector<RootSignatureToken> &Tokens) { // Discard any leading whitespace AdvanceBuffer(Buffer.take_while(isspace).size()); @@ -41,6 +96,10 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { break; } + // Numeric constant + if (isdigit(C) || C == '-' || C == '+') + return LexNumber(Result); + // Unable to match on any token type PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); return true; diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index e5f88bbfa0ff6af..713bc5b1257f749 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -111,10 +111,39 @@ class ParseHLSLRootSignatureTest : public ::testing::Test { // Valid Lexing Tests +TEST_F(ParseHLSLRootSignatureTest, ValidLexNumbersTest) { + // This test will check that we can lex different number tokens + const llvm::StringLiteral Source = R"cc( + -42 42 +42 + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test no diagnostics produced + Consumer->SetNoDiag(); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens; + ASSERT_FALSE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Consumer->IsSatisfied()); + + SmallVector<hlsl::TokenKind> Expected = { + hlsl::TokenKind::int_literal, + hlsl::TokenKind::int_literal, + hlsl::TokenKind::int_literal, + }; + CheckTokens(Tokens, Expected); +} + TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { // This test will check that we can lex all defined tokens as defined in // HLSLRootSignatureTokenKinds.def, plus some additional integer variations const llvm::StringLiteral Source = R"cc( + 42 + (),|= )cc"; @@ -144,6 +173,46 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { // Invalid Lexing Tests +TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) { + // This test will check that the lexing fails due to an integer overflow + const llvm::StringLiteral Source = R"cc( + 4294967296 + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test correct diagnostic produced + Consumer->SetExpected(diag::err_hlsl_number_literal_overflow); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens; + ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Consumer->IsSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, InvalidLexEmptyNumberTest) { + // This test will check that the lexing fails due to no integer being provided + const llvm::StringLiteral Source = R"cc( + - + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test correct diagnostic produced + Consumer->SetExpected(diag::err_hlsl_invalid_number_literal); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens; + ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Consumer->IsSatisfied()); +} + TEST_F(ParseHLSLRootSignatureTest, InvalidLexIdentifierTest) { // This test will check that the lexing fails due to no valid token const llvm::StringLiteral Source = R"cc( >From dc0c7ba8da50f3b12395d1ff90f77c42073d21ff Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Fri, 24 Jan 2025 21:04:36 +0000 Subject: [PATCH 03/12] Add support for lexing registers --- .../include/clang/Basic/DiagnosticLexKinds.td | 1 + .../Parse/HLSLRootSignatureTokenKinds.def | 6 +++ clang/lib/Parse/ParseHLSLRootSignature.cpp | 47 +++++++++++++++++++ .../Parse/ParseHLSLRootSignatureTest.cpp | 23 +++++++++ 4 files changed, 77 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 81d314c838cedec..cbadd16df3f9db1 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1025,6 +1025,7 @@ let CategoryName = "Root Signature Lexical Issue" in { def err_hlsl_number_literal_overflow : Error<"provided %select{unsigned integer|signed integer}0 literal '%1' that overflows the maximum of 32 bits">; def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">; + def err_hlsl_invalid_register_literal: Error<"expected unsigned integer literal as part of register">; } } diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def index 64c5fd14a2017f0..fc4dbfef7287981 100644 --- a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -25,6 +25,12 @@ TOK(invalid) TOK(int_literal) +// Register Tokens: +TOK(bReg) +TOK(tReg) +TOK(uReg) +TOK(sReg) + // Punctuators: PUNCTUATOR(l_paren, '(') PUNCTUATOR(r_paren, ')') diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 1cc75998ca07da3..1b9973beb44170f 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -100,6 +100,53 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { if (isdigit(C) || C == '-' || C == '+') return LexNumber(Result); + // All following tokens require at least one additional character + if (Buffer.size() <= 1) { + PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); + return true; + } + + // Peek at the next character to deteremine token type + char NextC = Buffer[1]; + + // Registers: [tsub][0-9+] + if ((C == 't' || C == 's' || C == 'u' || C == 'b') && isdigit(NextC)) { + AdvanceBuffer(); + + if (LexNumber(Result)) + return true; // Error parsing number which is already reported + + // Lex number could also parse a float so ensure it was an unsigned int + if (Result.Kind != TokenKind::int_literal || + Result.NumLiteral.getInt().isSigned()) { + // Return invalid number literal for register error + PP.getDiagnostics().Report(Result.TokLoc, + diag::err_hlsl_invalid_register_literal); + return true; + } + + // Convert character to the register type. + // This is done after LexNumber to override the TokenKind + switch (C) { + case 'b': + Result.Kind = TokenKind::bReg; + break; + case 't': + Result.Kind = TokenKind::tReg; + break; + case 'u': + Result.Kind = TokenKind::uReg; + break; + case 's': + Result.Kind = TokenKind::sReg; + break; + default: + llvm_unreachable("Switch for an expected token was not provided"); + return true; + } + return false; + } + // Unable to match on any token type PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); return true; diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 713bc5b1257f749..47195099ed60dfa 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -144,6 +144,8 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { const llvm::StringLiteral Source = R"cc( 42 + b0 t43 u987 s234 + (),|= )cc"; @@ -213,6 +215,27 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexEmptyNumberTest) { ASSERT_TRUE(Consumer->IsSatisfied()); } +TEST_F(ParseHLSLRootSignatureTest, InvalidLexRegNumberTest) { + // This test will check that the lexing fails due to no integer being provided + const llvm::StringLiteral Source = R"cc( + b32.4 + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test correct diagnostic produced + Consumer->SetExpected(diag::err_hlsl_invalid_register_literal); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + SmallVector<hlsl::RootSignatureToken> Tokens; + ASSERT_TRUE(Lexer.Lex(Tokens)); + // FIXME(#120472): This should be TRUE once we can lex a floating + ASSERT_FALSE(Consumer->IsSatisfied()); +} + TEST_F(ParseHLSLRootSignatureTest, InvalidLexIdentifierTest) { // This test will check that the lexing fails due to no valid token const llvm::StringLiteral Source = R"cc( >From c5d3881c89cbed9f3dd7782ebad08e9a71f4bd64 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Fri, 24 Jan 2025 21:40:23 +0000 Subject: [PATCH 04/12] Add lexing for example keyword and enum --- .../Parse/HLSLRootSignatureTokenKinds.def | 20 ++++++++++++++++ .../clang/Parse/ParseHLSLRootSignature.h | 1 + clang/lib/Parse/ParseHLSLRootSignature.cpp | 23 ++++++++++++++++--- .../Parse/ParseHLSLRootSignatureTest.cpp | 4 ++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def index fc4dbfef7287981..d73c9adbb94e5c3 100644 --- a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -20,6 +20,17 @@ #ifndef PUNCTUATOR #define PUNCTUATOR(X,Y) TOK(pu_ ## X) #endif +#ifndef KEYWORD +#define KEYWORD(X) TOK(kw_ ## X) +#endif +#ifndef ENUM +#define ENUM(NAME, LIT) TOK(en_ ## NAME) +#endif + +// Defines the various types of enum +#ifndef DESCRIPTOR_RANGE_OFFSET_ENUM +#define DESCRIPTOR_RANGE_OFFSET_ENUM(NAME, LIT) ENUM(NAME, LIT) +#endif // General Tokens: TOK(invalid) @@ -38,5 +49,14 @@ PUNCTUATOR(comma, ',') PUNCTUATOR(or, '|') PUNCTUATOR(equal, '=') +// RootElement Keywords: +KEYWORD(DescriptorTable) + +// Descriptor Range Offset Enum: +DESCRIPTOR_RANGE_OFFSET_ENUM(DescriptorRangeOffsetAppend, "DESCRIPTOR_RANGE_OFFSET_APPEND") + +#undef DESCRIPTOR_RANGE_OFFSET_ENUM +#undef ENUM +#undef KEYWORD #undef PUNCTUATOR #undef TOK diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index 3b2c61781b1da30..899608bd1527ea9 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" namespace clang { namespace hlsl { diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 1b9973beb44170f..7ceb85a47a088e1 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -147,9 +147,26 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { return false; } - // Unable to match on any token type - PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); - return true; + // Keywords and Enums: + StringRef TokSpelling = + Buffer.take_while([](char C) { return isalnum(C) || C == '_'; }); + + // Define a large string switch statement for all the keywords and enums + auto Switch = llvm::StringSwitch<TokenKind>(TokSpelling); +#define KEYWORD(NAME) Switch.Case(#NAME, TokenKind::kw_##NAME); +#define ENUM(NAME, LIT) Switch.CaseLower(LIT, TokenKind::en_##NAME); +#include "clang/Parse/HLSLRootSignatureTokenKinds.def" + + // Then attempt to retreive a string from it + auto Kind = Switch.Default(TokenKind::invalid); + if (Kind == TokenKind::invalid) { + PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_invalid_token); + return true; + } + + Result.Kind = Kind; + AdvanceBuffer(TokSpelling.size()); + return false; } } // namespace hlsl diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 47195099ed60dfa..d80ded10ba313cb 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -147,6 +147,10 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { b0 t43 u987 s234 (),|= + + DescriptorTable + + DESCRIPTOR_RANGE_OFFSET_APPEND )cc"; TrivialModuleLoader ModLoader; >From dc784ffa0da9b7e2d16397912f53c0408d3208c7 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Fri, 24 Jan 2025 21:44:24 +0000 Subject: [PATCH 05/12] Add lexing for remaining DescriptorTable keywords and enums --- .../Parse/HLSLRootSignatureTokenKinds.def | 59 +++++++++++++++++++ .../Parse/ParseHLSLRootSignatureTest.cpp | 19 ++++++ 2 files changed, 78 insertions(+) diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def index d73c9adbb94e5c3..5e47af8d4b13644 100644 --- a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -31,6 +31,23 @@ #ifndef DESCRIPTOR_RANGE_OFFSET_ENUM #define DESCRIPTOR_RANGE_OFFSET_ENUM(NAME, LIT) ENUM(NAME, LIT) #endif +#ifndef ROOT_DESCRIPTOR_FLAG_ENUM +#define ROOT_DESCRIPTOR_FLAG_ENUM(NAME, LIT) ENUM(NAME, LIT) +#endif +// Note: ON denotes that the flag is unique from the above Root Descriptor +// Flags. This is required to avoid token kind enum conflicts. +#ifndef DESCRIPTOR_RANGE_FLAG_ENUM_OFF +#define DESCRIPTOR_RANGE_FLAG_ENUM_OFF(NAME, LIT) +#endif +#ifndef DESCRIPTOR_RANGE_FLAG_ENUM_ON +#define DESCRIPTOR_RANGE_FLAG_ENUM_ON(NAME, LIT) ENUM(NAME, LIT) +#endif +#ifndef DESCRIPTOR_RANGE_FLAG_ENUM +#define DESCRIPTOR_RANGE_FLAG_ENUM(NAME, LIT, ON) DESCRIPTOR_RANGE_FLAG_ENUM_##ON(NAME, LIT) +#endif +#ifndef SHADER_VISIBILITY_ENUM +#define SHADER_VISIBILITY_ENUM(NAME, LIT) ENUM(NAME, LIT) +#endif // General Tokens: TOK(invalid) @@ -52,9 +69,51 @@ PUNCTUATOR(equal, '=') // RootElement Keywords: KEYWORD(DescriptorTable) +// DescriptorTable Keywords: +KEYWORD(CBV) +KEYWORD(SRV) +KEYWORD(UAV) +KEYWORD(Sampler) + +// General Parameter Keywords: +KEYWORD(space) +KEYWORD(visibility) +KEYWORD(flags) + +// View Parameter Keywords: +KEYWORD(numDescriptors) +KEYWORD(offset) + // Descriptor Range Offset Enum: DESCRIPTOR_RANGE_OFFSET_ENUM(DescriptorRangeOffsetAppend, "DESCRIPTOR_RANGE_OFFSET_APPEND") +// Root Descriptor Flag Enums: +ROOT_DESCRIPTOR_FLAG_ENUM(DataVolatile, "DATA_VOLATILE") +ROOT_DESCRIPTOR_FLAG_ENUM(DataStaticWhileSetAtExecute, "DATA_STATIC_WHILE_SET_AT_EXECUTE") +ROOT_DESCRIPTOR_FLAG_ENUM(DataStatic, "DATA_STATIC") + +// Descriptor Range Flag Enums: +DESCRIPTOR_RANGE_FLAG_ENUM(DescriptorsVolatile, "DESCRIPTORS_VOLATILE", ON) +DESCRIPTOR_RANGE_FLAG_ENUM(DataVolatile, "DATA_VOLATILE", OFF) +DESCRIPTOR_RANGE_FLAG_ENUM(DataStaticWhileSetAtExecute, "DATA_STATIC_WHILE_SET_AT_EXECUTE", OFF) +DESCRIPTOR_RANGE_FLAG_ENUM(DataStatic, "DATA_STATIC", OFF) +DESCRIPTOR_RANGE_FLAG_ENUM(DescriptorsStaticKeepingBufferBoundsChecks, "DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS", ON) + +// Shader Visibiliy Enums: +SHADER_VISIBILITY_ENUM(All, "SHADER_VISIBILITY_ALL") +SHADER_VISIBILITY_ENUM(Vertex, "SHADER_VISIBILITY_VERTEX") +SHADER_VISIBILITY_ENUM(Hull, "SHADER_VISIBILITY_HULL") +SHADER_VISIBILITY_ENUM(Domain, "SHADER_VISIBILITY_DOMAIN") +SHADER_VISIBILITY_ENUM(Geometry, "SHADER_VISIBILITY_GEOMETRY") +SHADER_VISIBILITY_ENUM(Pixel, "SHADER_VISIBILITY_PIXEL") +SHADER_VISIBILITY_ENUM(Amplification, "SHADER_VISIBILITY_AMPLIFICATION") +SHADER_VISIBILITY_ENUM(Mesh, "SHADER_VISIBILITY_MESH") + +#undef SHADER_VISIBILITY_ENUM +#undef DESCRIPTOR_RANGE_FLAG_ENUM +#undef DESCRIPTOR_RANGE_FLAG_ENUM_OFF +#undef DESCRIPTOR_RANGE_FLAG_ENUM_ON +#undef ROOT_DESCRIPTOR_FLAG_ENUM #undef DESCRIPTOR_RANGE_OFFSET_ENUM #undef ENUM #undef KEYWORD diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index d80ded10ba313cb..57b61e43746a0bb 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -150,7 +150,26 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { DescriptorTable + CBV SRV UAV Sampler + space visibility flags + numDescriptors offset + DESCRIPTOR_RANGE_OFFSET_APPEND + + DATA_VOLATILE + DATA_STATIC_WHILE_SET_AT_EXECUTE + DATA_STATIC + DESCRIPTORS_VOLATILE + DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS + + shader_visibility_all + shader_visibility_vertex + shader_visibility_hull + shader_visibility_domain + shader_visibility_geometry + shader_visibility_pixel + shader_visibility_amplification + shader_visibility_mesh )cc"; TrivialModuleLoader ModLoader; >From 82c645d4049950bdcfb151b0093708338dd1e714 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Tue, 4 Feb 2025 18:22:48 +0000 Subject: [PATCH 06/12] review: update uses of llvm_unreachable --- clang/lib/Parse/ParseHLSLRootSignature.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 7ceb85a47a088e1..da8bce9ea9ebbe8 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -34,12 +34,9 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { if (Literal.hadError) return true; // Error has already been reported so just return - if (!Literal.isIntegerLiteral()) { - // Note: if IsNumberChar allows for hexidecimal we will need to turn this - // into a diagnostics for potential fixed-point literals - llvm_unreachable("IsNumberChar will only support digits"); - return true; - } + // Note: if IsNumberChar allows for hexidecimal we will need to turn this + // into a diagnostics for potential fixed-point literals + assert(Literal.isIntegerLiteral() && "IsNumberChar will only support digits"); // Retrieve the number value to store into the token Result.Kind = TokenKind::int_literal; @@ -142,7 +139,6 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { break; default: llvm_unreachable("Switch for an expected token was not provided"); - return true; } return false; } >From 1c95e6d852f668b5fbab05a4ebe48526b190a5d5 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Tue, 4 Feb 2025 18:23:12 +0000 Subject: [PATCH 07/12] review: update target test triple with comment --- clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 57b61e43746a0bb..1f263f9371d5109 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -68,7 +68,8 @@ class ParseHLSLRootSignatureTest : public ::testing::Test { Consumer(new ExpectedDiagConsumer()), Diags(DiagID, new DiagnosticOptions, Consumer), SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { - TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; + // This is an arbitrarily chosen target triple to create the target info. + TargetOpts->Triple = "dxil"; Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); } >From d648f4ccb8c74060e9e6d75b46f54ac0127e4302 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Tue, 4 Feb 2025 19:04:21 +0000 Subject: [PATCH 08/12] review: dealing with signed positive integer - the previous implementation would not have thrown an overflow error as expected. Instead it would have been treated as a negative int - DXC treats all positively signed ints as an unsigned integer, so for compatibility, we will be doing the same. - add better unit tests to demonstrate expected functionality --- clang/lib/Parse/ParseHLSLRootSignature.cpp | 4 +++- .../Parse/ParseHLSLRootSignatureTest.cpp | 24 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index da8bce9ea9ebbe8..261f4fd08bebbbb 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -41,7 +41,9 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { // Retrieve the number value to store into the token Result.Kind = TokenKind::int_literal; - llvm::APSInt X = llvm::APSInt(32, !Signed); + // NOTE: for compabibility with DXC, we will treat any integer with '+' as an + // unsigned integer + llvm::APSInt X = llvm::APSInt(32, !Negative); if (Literal.GetIntegerValue(X)) { // Report that the value has overflowed PP.getDiagnostics().Report(Result.TokLoc, diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 1f263f9371d5109..b143695ff2826e7 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -115,7 +115,7 @@ class ParseHLSLRootSignatureTest : public ::testing::Test { TEST_F(ParseHLSLRootSignatureTest, ValidLexNumbersTest) { // This test will check that we can lex different number tokens const llvm::StringLiteral Source = R"cc( - -42 42 +42 + -42 42 +42 +2147483648 )cc"; TrivialModuleLoader ModLoader; @@ -135,8 +135,30 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexNumbersTest) { hlsl::TokenKind::int_literal, hlsl::TokenKind::int_literal, hlsl::TokenKind::int_literal, + hlsl::TokenKind::int_literal, }; CheckTokens(Tokens, Expected); + + // Sample negative int + hlsl::RootSignatureToken IntToken = Tokens[0]; + ASSERT_TRUE(IntToken.NumLiteral.getInt().isSigned()); + ASSERT_EQ(IntToken.NumLiteral.getInt().getExtValue(), -42); + + // Sample unsigned int + IntToken = Tokens[1]; + ASSERT_FALSE(IntToken.NumLiteral.getInt().isSigned()); + ASSERT_EQ(IntToken.NumLiteral.getInt().getExtValue(), 42); + + // Sample positive int that is treated as unsigned + IntToken = Tokens[2]; + ASSERT_FALSE(IntToken.NumLiteral.getInt().isSigned()); + ASSERT_EQ(IntToken.NumLiteral.getInt().getExtValue(), 42); + + // Sample positive int that would overflow the signed representation but + // is treated as an unsigned integer instead + IntToken = Tokens[3]; + ASSERT_FALSE(IntToken.NumLiteral.getInt().isSigned()); + ASSERT_EQ(IntToken.NumLiteral.getInt().getExtValue(), 2147483648); } TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { >From de2f62e26d1c8e1bc9c1fd2aebe15acfc46147ec Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Mon, 10 Feb 2025 20:29:26 +0000 Subject: [PATCH 09/12] review: update diagnostics message --- clang/include/clang/Basic/DiagnosticLexKinds.td | 5 ++--- clang/lib/Parse/ParseHLSLRootSignature.cpp | 7 ++++--- clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index cbadd16df3f9db1..7f788195562ed99 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1020,10 +1020,9 @@ Error<"expected 'begin' or 'end'">; // HLSL Root Signature Lexing Errors let CategoryName = "Root Signature Lexical Issue" in { - def err_hlsl_invalid_number_literal: - Error<"expected number literal is not a supported number literal of unsigned integer or integer">; + def err_hlsl_expected_number_literal: Error<"expected numberic literal">; def err_hlsl_number_literal_overflow : - Error<"provided %select{unsigned integer|signed integer}0 literal '%1' that overflows the maximum of 32 bits">; + Error<"integer literal is too large to be represented in any %select{signed |}0 integer type">; def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">; def err_hlsl_invalid_register_literal: Error<"expected unsigned integer literal as part of register">; } diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 261f4fd08bebbbb..07ca70c5d404e2b 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -20,10 +20,11 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { // Retrieve the possible number StringRef NumSpelling = Buffer.take_while(IsNumberChar); - // Catch this now as the Literal Parser will accept it as valid + // Catch when there is a '+' or '-' specified but no literal value after. + // This is invalid but the NumericLiteralParser will accept this as valid. if (NumSpelling.empty()) { PP.getDiagnostics().Report(Result.TokLoc, - diag::err_hlsl_invalid_number_literal); + diag::err_hlsl_expected_number_literal); return true; } @@ -48,7 +49,7 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { // Report that the value has overflowed PP.getDiagnostics().Report(Result.TokLoc, diag::err_hlsl_number_literal_overflow) - << (unsigned)Signed << NumSpelling; + << (unsigned)!Signed << NumSpelling; return true; } diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index b143695ff2826e7..0e6408f59663fe4 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -252,7 +252,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexEmptyNumberTest) { auto TokLoc = SourceLocation(); // Test correct diagnostic produced - Consumer->SetExpected(diag::err_hlsl_invalid_number_literal); + Consumer->SetExpected(diag::err_hlsl_expected_number_literal); hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); >From f557d5a922f2d79103c96abbb15c880d91fbf038 Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Mon, 10 Feb 2025 20:30:30 +0000 Subject: [PATCH 10/12] review: update linked todo --- clang/lib/Parse/ParseHLSLRootSignature.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 07ca70c5d404e2b..900b3511e163958 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -6,7 +6,7 @@ namespace hlsl { // Lexer Definitions static bool IsNumberChar(char C) { - // TODO(#120472): extend for float support exponents + // TODO(#126565): extend for float support exponents return isdigit(C); // integer support } >From de364d896b24ea782b42e97fe76f71ebdbeaaa9e Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Mon, 10 Feb 2025 22:06:05 +0000 Subject: [PATCH 11/12] review: remove api for pre-allocating tokens - we want to prevent the pre-allocation of all the tokens and rather let the parser allocate the tokens as it goes along - this allows for a quicker cycle to the user if there is a parsing error as we don't need to lex all of the tokens before trying to parse - as such, this change provides a new api for interacting with the lexer through ConsumeToken and PeekNextToken as opposed to generated the entire list of lexed tokens --- .../include/clang/Basic/DiagnosticLexKinds.td | 1 + .../Parse/HLSLRootSignatureTokenKinds.def | 1 + .../clang/Parse/ParseHLSLRootSignature.h | 36 ++++-- clang/lib/Parse/ParseHLSLRootSignature.cpp | 55 +++++++--- .../Parse/ParseHLSLRootSignatureTest.cpp | 103 ++++++++++++++---- 5 files changed, 143 insertions(+), 53 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 7f788195562ed99..3de446fd6c771fc 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -1025,6 +1025,7 @@ let CategoryName = "Root Signature Lexical Issue" in { Error<"integer literal is too large to be represented in any %select{signed |}0 integer type">; def err_hlsl_invalid_token: Error<"unable to lex a valid Root Signature token">; def err_hlsl_invalid_register_literal: Error<"expected unsigned integer literal as part of register">; + def err_hlsl_rootsig_unexpected_eos : Error<"unexpected end to token stream">; } } diff --git a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def index 5e47af8d4b13644..8e01fcf1175001c 100644 --- a/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def +++ b/clang/include/clang/Parse/HLSLRootSignatureTokenKinds.def @@ -51,6 +51,7 @@ // General Tokens: TOK(invalid) +TOK(end_of_stream) TOK(int_literal) // Register Tokens: diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index 899608bd1527ea9..31529b0c03cfabd 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -39,7 +39,9 @@ struct RootSignatureToken { APValue NumLiteral = APValue(); // Constructors + RootSignatureToken() : TokLoc(SourceLocation()) {} RootSignatureToken(clang::SourceLocation TokLoc) : TokLoc(TokLoc) {} + RootSignatureToken(enum Kind Kind, clang::SourceLocation TokLoc) : Kind(Kind), TokLoc(TokLoc) {} }; using TokenKind = enum RootSignatureToken::Kind; @@ -49,28 +51,38 @@ class RootSignatureLexer { clang::Preprocessor &PP) : Buffer(Signature), SourceLoc(SourceLoc), PP(PP) {} - // Consumes the internal buffer as a list of tokens and will emplace them - // onto the given tokens. - // - // It will consume until it successfully reaches the end of the buffer, - // or, until the first error is encountered. The return value denotes if - // there was a failure. - bool Lex(SmallVector<RootSignatureToken> &Tokens); + /// Updates CurToken to the next token. Either it will take the previously + /// lexed NextToken, or it will lex a token. + /// + /// The return value denotes if there was a failure. + bool ConsumeToken(); + + /// Returns the token that comes after CurToken or std::nullopt if an + /// error is encountered during lexing of the next token. + std::optional<RootSignatureToken> PeekNextToken(); + + RootSignatureToken GetCurToken() { return CurToken; } + + /// Check if we have reached the end of input + bool EndOfBuffer() { + AdvanceBuffer(Buffer.take_while(isspace).size()); + return Buffer.empty(); + } private: // Internal buffer to iterate over StringRef Buffer; + // Current Token state + RootSignatureToken CurToken; + std::optional<RootSignatureToken> NextToken = std::nullopt; + // Passed down parameters from Sema clang::SourceLocation SourceLoc; clang::Preprocessor &PP; bool LexNumber(RootSignatureToken &Result); - - // Consumes the internal buffer for a single token. - // - // The return value denotes if there was a failure. - bool LexToken(RootSignatureToken &Token); + bool LexToken(RootSignatureToken &Result); // Advance the buffer by the specified number of characters. Updates the // SourceLocation appropriately. diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 900b3511e163958..88d892dd4308a48 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -60,27 +60,13 @@ bool RootSignatureLexer::LexNumber(RootSignatureToken &Result) { return false; } -bool RootSignatureLexer::Lex(SmallVector<RootSignatureToken> &Tokens) { +bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { // Discard any leading whitespace AdvanceBuffer(Buffer.take_while(isspace).size()); - while (!Buffer.empty()) { - // Record where this token is in the text for usage in parser diagnostics - RootSignatureToken Result(SourceLoc); - if (LexToken(Result)) - return true; - - // Successfully Lexed the token so we can store it - Tokens.push_back(Result); + // Record where this token is in the text for usage in parser diagnostics + Result = RootSignatureToken(SourceLoc); - // Discard any trailing whitespace - AdvanceBuffer(Buffer.take_while(isspace).size()); - } - - return false; -} - -bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { char C = Buffer.front(); // Punctuators @@ -168,5 +154,40 @@ bool RootSignatureLexer::LexToken(RootSignatureToken &Result) { return false; } +bool RootSignatureLexer::ConsumeToken() { + // If we previously peeked then just copy the value over + if (NextToken && NextToken->Kind != TokenKind::end_of_stream) { + CurToken = *NextToken; + NextToken = std::nullopt; + return false; + } + + // This will be implicity be true if NextToken->Kind == end_of_stream + if (EndOfBuffer()) { + // Report unexpected end of tokens error + PP.getDiagnostics().Report(SourceLoc, diag::err_hlsl_rootsig_unexpected_eos); + return true; + } + + return LexToken(CurToken); +} + +std::optional<RootSignatureToken> RootSignatureLexer::PeekNextToken() { + // Already peeked from the current token + if (NextToken.has_value()) + return NextToken; + + if (EndOfBuffer()) { + // We have reached the end of the stream, but only error on consume + return RootSignatureToken(TokenKind::end_of_stream, SourceLoc); + } + + RootSignatureToken Result; + if (LexToken(Result)) + return std::nullopt; + NextToken = Result; + return Result; +} + } // namespace hlsl } // namespace clang diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp index 0e6408f59663fe4..c5d2dda9b1c12ac 100644 --- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp +++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp @@ -91,12 +91,19 @@ class ParseHLSLRootSignatureTest : public ::testing::Test { return PP; } - void CheckTokens(SmallVector<hlsl::RootSignatureToken> &Computed, + void CheckTokens(hlsl::RootSignatureLexer &Lexer, + SmallVector<hlsl::RootSignatureToken> &Computed, SmallVector<hlsl::TokenKind> &Expected) { - ASSERT_EQ(Computed.size(), Expected.size()); for (unsigned I = 0, E = Expected.size(); I != E; ++I) { - ASSERT_EQ(Computed[I].Kind, Expected[I]); + if (Expected[I] == hlsl::TokenKind::invalid || + Expected[I] == hlsl::TokenKind::end_of_stream) + continue; + ASSERT_FALSE(Lexer.ConsumeToken()); + hlsl::RootSignatureToken Result = Lexer.GetCurToken(); + ASSERT_EQ(Result.Kind, Expected[I]); + Computed.push_back(Result); } + ASSERT_TRUE(Lexer.EndOfBuffer()); } FileSystemOptions FileMgrOpts; @@ -128,16 +135,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexNumbersTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); SmallVector<hlsl::RootSignatureToken> Tokens; - ASSERT_FALSE(Lexer.Lex(Tokens)); - ASSERT_TRUE(Consumer->IsSatisfied()); - SmallVector<hlsl::TokenKind> Expected = { hlsl::TokenKind::int_literal, hlsl::TokenKind::int_literal, hlsl::TokenKind::int_literal, hlsl::TokenKind::int_literal, }; - CheckTokens(Tokens, Expected); + CheckTokens(Lexer, Tokens, Expected); + ASSERT_TRUE(Consumer->IsSatisfied()); // Sample negative int hlsl::RootSignatureToken IntToken = Tokens[0]; @@ -204,23 +209,77 @@ TEST_F(ParseHLSLRootSignatureTest, ValidLexAllTokensTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); - SmallVector<hlsl::RootSignatureToken> Tokens = { - hlsl::RootSignatureToken( - SourceLocation()) // invalid token for completeness - }; - ASSERT_FALSE(Lexer.Lex(Tokens)); - ASSERT_TRUE(Consumer->IsSatisfied()); - + SmallVector<hlsl::RootSignatureToken> Tokens; SmallVector<hlsl::TokenKind> Expected = { #define TOK(NAME) hlsl::TokenKind::NAME, #include "clang/Parse/HLSLRootSignatureTokenKinds.def" }; - CheckTokens(Tokens, Expected); + CheckTokens(Lexer, Tokens, Expected); + ASSERT_TRUE(Consumer->IsSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, ValidLexPeekTest) { + // This test will check that we can lex all defined tokens as defined in + // HLSLRootSignatureTokenKinds.def, plus some additional integer variations + const llvm::StringLiteral Source = R"cc( + )1 + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + // Test no diagnostics produced + Consumer->SetNoDiag(); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + // Test basic peek + auto Res = Lexer.PeekNextToken(); + ASSERT_TRUE(Res.has_value()); + ASSERT_EQ(Res->Kind, hlsl::TokenKind::pu_r_paren); + + // Ensure it doesn't peek past one element + Res = Lexer.PeekNextToken(); + ASSERT_TRUE(Res.has_value()); + ASSERT_EQ(Res->Kind, hlsl::TokenKind::pu_r_paren); + + ASSERT_FALSE(Lexer.ConsumeToken()); + + // Invoke after reseting the NextToken + Res = Lexer.PeekNextToken(); + ASSERT_TRUE(Res.has_value()); + ASSERT_EQ(Res->Kind, hlsl::TokenKind::int_literal); + + // Ensure we can still consume the second token + ASSERT_FALSE(Lexer.ConsumeToken()); + + // Ensure no error raised when peeking past end of stream + Res = Lexer.PeekNextToken(); + ASSERT_TRUE(Res.has_value()); + ASSERT_EQ(Res->Kind, hlsl::TokenKind::end_of_stream); + + ASSERT_TRUE(Consumer->IsSatisfied()); } // Invalid Lexing Tests +TEST_F(ParseHLSLRootSignatureTest, InvalidParseUnexpectedEOSTest) { + const llvm::StringLiteral Source = R"cc()cc"; + + TrivialModuleLoader ModLoader; + auto PP = CreatePP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); + + // Test correct diagnostic produced + Consumer->SetExpected(diag::err_hlsl_rootsig_unexpected_eos); + ASSERT_TRUE(Lexer.ConsumeToken()); + + ASSERT_TRUE(Consumer->IsSatisfied()); +} + TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) { // This test will check that the lexing fails due to an integer overflow const llvm::StringLiteral Source = R"cc( @@ -236,8 +295,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); - SmallVector<hlsl::RootSignatureToken> Tokens; - ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Lexer.ConsumeToken()); ASSERT_TRUE(Consumer->IsSatisfied()); } @@ -256,8 +314,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexEmptyNumberTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); - SmallVector<hlsl::RootSignatureToken> Tokens; - ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Lexer.ConsumeToken()); ASSERT_TRUE(Consumer->IsSatisfied()); } @@ -276,9 +333,8 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexRegNumberTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); - SmallVector<hlsl::RootSignatureToken> Tokens; - ASSERT_TRUE(Lexer.Lex(Tokens)); - // FIXME(#120472): This should be TRUE once we can lex a floating + ASSERT_TRUE(Lexer.ConsumeToken()); + // FIXME(#126565): This should be TRUE once we can lex a float ASSERT_FALSE(Consumer->IsSatisfied()); } @@ -297,8 +353,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexIdentifierTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc, *PP); - SmallVector<hlsl::RootSignatureToken> Tokens; - ASSERT_TRUE(Lexer.Lex(Tokens)); + ASSERT_TRUE(Lexer.ConsumeToken()); ASSERT_TRUE(Consumer->IsSatisfied()); } >From 35c15661e7082ea090578547e2de4c29cd0ba37a Mon Sep 17 00:00:00 2001 From: Finn Plummer <canadienf...@gmail.com> Date: Tue, 11 Feb 2025 15:55:55 +0000 Subject: [PATCH 12/12] clang format --- clang/include/clang/Parse/ParseHLSLRootSignature.h | 3 ++- clang/lib/Parse/ParseHLSLRootSignature.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h index 31529b0c03cfabd..15898f8c24bbae5 100644 --- a/clang/include/clang/Parse/ParseHLSLRootSignature.h +++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h @@ -41,7 +41,8 @@ struct RootSignatureToken { // Constructors RootSignatureToken() : TokLoc(SourceLocation()) {} RootSignatureToken(clang::SourceLocation TokLoc) : TokLoc(TokLoc) {} - RootSignatureToken(enum Kind Kind, clang::SourceLocation TokLoc) : Kind(Kind), TokLoc(TokLoc) {} + RootSignatureToken(enum Kind Kind, clang::SourceLocation TokLoc) + : Kind(Kind), TokLoc(TokLoc) {} }; using TokenKind = enum RootSignatureToken::Kind; diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp index 88d892dd4308a48..84905674df0ac37 100644 --- a/clang/lib/Parse/ParseHLSLRootSignature.cpp +++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp @@ -165,7 +165,8 @@ bool RootSignatureLexer::ConsumeToken() { // This will be implicity be true if NextToken->Kind == end_of_stream if (EndOfBuffer()) { // Report unexpected end of tokens error - PP.getDiagnostics().Report(SourceLoc, diag::err_hlsl_rootsig_unexpected_eos); + PP.getDiagnostics().Report(SourceLoc, + diag::err_hlsl_rootsig_unexpected_eos); return true; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits