https://github.com/michael-jabbour-sonarsource updated https://github.com/llvm/llvm-project/pull/114240
>From 9eede4531ad27ce1d77df492076d5fbcd237fd3f Mon Sep 17 00:00:00 2001 From: Michael Jabbour <michael.jabb...@sonarsource.com> Date: Fri, 7 Mar 2025 09:32:54 +0100 Subject: [PATCH] [clang] Fix ASTWriter crash after merging named enums --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Sema/Sema.h | 8 +- clang/lib/Parse/ParseDecl.cpp | 2 +- clang/lib/Parse/ParseDeclCXX.cpp | 3 +- clang/lib/Sema/SemaDecl.cpp | 30 ++++--- clang/test/Modules/modules-merge-enum.m | 111 ++++++++++++++++++++++++ 6 files changed, 139 insertions(+), 16 deletions(-) create mode 100644 clang/test/Modules/modules-merge-enum.m diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 28856c27317f3..a25808e36bd51 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -248,6 +248,7 @@ Bug Fixes in This Version - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fixed a crash when merging named enumerations in modules (#GH114240). - Fixed rejects-valid problem when #embed appears in std::initializer_list or when it can affect template argument deduction (#GH122306). - Fix crash on code completion of function calls involving partial order of function templates diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 08621e633b55c..fdef57e84ee3d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4008,7 +4008,7 @@ class Sema final : public SemaBase { /// Perform ODR-like check for C/ObjC when merging tag types from modules. /// Differently from C++, actually parse the body and reject / error out /// in case of a structural mismatch. - bool ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody); + bool ActOnDuplicateDefinition(Scope *S, Decl *Prev, SkipBodyInfo &SkipBody); typedef void *SkippedDefinitionContext; @@ -4132,6 +4132,12 @@ class Sema final : public SemaBase { void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, LookupResult &OldDecls); + /// CleanupMergedEnum - We have just merged the decl 'New' by making another + /// definition visible. + /// This method performs any necessary cleanup on the parser state to discard + /// child nodes from newly parsed decl we are retiring. + void CleanupMergedEnum(Scope *S, Decl *New); + /// MergeFunctionDecl - We just parsed a function 'New' from /// declarator D which has the same name and scope as a previous /// declaration 'Old'. Figure out how to resolve this situation, diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 7ae136af47391..31cb4a24ad97e 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -5652,7 +5652,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagDecl; ParseEnumBody(StartLoc, D); if (SkipBody.CheckSameAsPrevious && - !Actions.ActOnDuplicateDefinition(TagDecl, SkipBody)) { + !Actions.ActOnDuplicateDefinition(getCurScope(), TagDecl, SkipBody)) { DS.SetTypeSpecError(); return; } diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 43db715ac6d70..e48a35078892d 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -2350,7 +2350,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // Parse the definition body. ParseStructUnionBody(StartLoc, TagType, cast<RecordDecl>(D)); if (SkipBody.CheckSameAsPrevious && - !Actions.ActOnDuplicateDefinition(TagOrTempResult.get(), SkipBody)) { + !Actions.ActOnDuplicateDefinition(getCurScope(), + TagOrTempResult.get(), SkipBody)) { DS.SetTypeSpecError(); return; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 714210c3856d7..5716eb61d4ae8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2564,18 +2564,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, // Make the old tag definition visible. makeMergedDefinitionVisible(Hidden); - // If this was an unscoped enumeration, yank all of its enumerators - // out of the scope. - if (isa<EnumDecl>(NewTag)) { - Scope *EnumScope = getNonFieldDeclScope(S); - for (auto *D : NewTag->decls()) { - auto *ED = cast<EnumConstantDecl>(D); - assert(EnumScope->isDeclScope(ED)); - EnumScope->RemoveDecl(ED); - IdResolver.RemoveDecl(ED); - ED->getLexicalDeclContext()->removeDecl(ED); - } - } + CleanupMergedEnum(S, NewTag); } } @@ -2652,6 +2641,19 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, notePreviousDefinition(Old, New->getLocation()); } +void Sema::CleanupMergedEnum(Scope *S, Decl *New) { + // If this was an unscoped enumeration, yank all of its enumerators + // out of the scope. + if (auto *ED = dyn_cast<EnumDecl>(New); ED && !ED->isScoped()) { + Scope *EnumScope = getNonFieldDeclScope(S); + for (auto *ECD : ED->enumerators()) { + assert(EnumScope->isDeclScope(ECD)); + EnumScope->RemoveDecl(ECD); + IdResolver.RemoveDecl(ECD); + } + } +} + /// DeclhasAttr - returns true if decl Declaration already has the target /// attribute. static bool DeclHasAttr(const Decl *D, const Attr *A) { @@ -18347,12 +18349,14 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) { AddPushedVisibilityAttribute(Tag); } -bool Sema::ActOnDuplicateDefinition(Decl *Prev, SkipBodyInfo &SkipBody) { +bool Sema::ActOnDuplicateDefinition(Scope *S, Decl *Prev, + SkipBodyInfo &SkipBody) { if (!hasStructuralCompatLayout(Prev, SkipBody.New)) return false; // Make the previous decl visible. makeMergedDefinitionVisible(SkipBody.Previous); + CleanupMergedEnum(S, SkipBody.New); return true; } diff --git a/clang/test/Modules/modules-merge-enum.m b/clang/test/Modules/modules-merge-enum.m new file mode 100644 index 0000000000000..f1010c1decc24 --- /dev/null +++ b/clang/test/Modules/modules-merge-enum.m @@ -0,0 +1,111 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + + +// Expect no crash +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m + +// Add -ast-dump-all to check that the AST nodes are merged correctly. +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modcache -fmodule-map-file=%t/module.modulemap %t/source.m -ast-dump-all 2>&1 | FileCheck %s + + +//--- shared.h +// This header is shared between two modules, but it's not a module itself. +// The enums defined here are parsed in both modules, and merged while building ModB. + +typedef enum MyEnum1 { MyVal_A } MyEnum1; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum1 +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_A 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum1 'enum MyEnum1' +// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported +// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported +// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1' + + +enum MyEnum2 { MyVal_B }; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum2 +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_B 'int' + + +typedef enum { MyVal_C } MyEnum3; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_C 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyEnum3 'enum MyEnum3':'MyEnum3' +// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported +// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported +// CHECK-NEXT: | `-Enum 0x{{.*}} + +struct MyStruct { + enum MyEnum5 { MyVal_D } Field; +}; + +// CHECK: |-RecordDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> struct MyStruct definition +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 <undeserialized declarations> MyEnum5 +// CHECK-NEXT: | | |-also in ModB +// CHECK-NEXT: | | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 referenced MyVal_D 'int' +// CHECK-NEXT: | `-FieldDecl 0x{{.*}} imported in ModA.ModAFile1 hidden Field 'enum MyEnum5' + +// In this case, no merging happens on the EnumDecl in Objective-C, and ASTWriter writes both EnumConstantDecls when building ModB. +enum { MyVal_E }; +// CHECK: |-EnumDecl 0x{{.*}} imported in ModA.ModAFile1 hidden <undeserialized declarations> +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModA.ModAFile1 hidden MyVal_E 'int' + + +// Redeclarations coming from ModB. +// CHECK: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum1 'enum MyEnum1' +// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum1' sugar imported +// CHECK-NEXT: | `-EnumType 0x{{.*}} 'enum MyEnum1' imported +// CHECK-NEXT: | `-Enum 0x{{.*}} 'MyEnum1' + +// CHECK: |-EnumDecl 0x{{.*}} prev 0x{{.*}} imported in ModB <undeserialized declarations> +// CHECK-NEXT: | |-also in ModB +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} imported in ModB MyVal_C 'int' +// CHECK-NEXT: |-TypedefDecl 0x{{.*}} prev 0x{{.*}} imported in ModB MyEnum3 'enum MyEnum3':'MyEnum3' +// CHECK-NEXT: | `-ElaboratedType 0x{{.*}} 'enum MyEnum3' sugar imported +// CHECK-NEXT: | `-EnumType 0x{{.*}} 'MyEnum3' imported +// CHECK-NEXT: | `-Enum 0x{{.*}} + +// CHECK: |-EnumDecl 0x{{.*}} imported in ModB <undeserialized declarations> +// CHECK-NEXT: | `-EnumConstantDecl 0x{{.*}} first 0x{{.*}} imported in ModB referenced MyVal_E 'int' + + + +//--- module.modulemap +module ModA { + module ModAFile1 { + header "ModAFile1.h" + export * + } + module ModAFile2 { + header "ModAFile2.h" + export * + } +} +// The goal of writing ModB is to test that ASTWriter can handle the merged AST nodes correctly. +// For example, a stale declaration in IdResolver can cause an assertion failure while writing the identifier table. +module ModB { + header "ModBFile.h" + export * +} + +//--- ModAFile1.h +#include "shared.h" + +//--- ModAFile2.h +// Including this file, triggers loading of the module ModA with nodes coming ModAFile1.h being hidden. + +//--- ModBFile.h +// ModBFile depends on ModAFile2.h only. +#include "ModAFile2.h" +// Including shared.h here causes Sema to merge the AST nodes from shared.h with the hidden ones from ModA. +#include "shared.h" + + +//--- source.m +#include "ModBFile.h" + +int main() { return MyVal_A + MyVal_B + MyVal_C + MyVal_D + MyVal_E; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits