https://github.com/sei-nreimer created https://github.com/llvm/llvm-project/pull/111705
AST output modifications primarily focused on JSON enhancements for the SEI Redemption project. Some of the key changes are: 1. Recursive Pointer Resolution - Modified the AST to accurately track multiple levels of pointer indirection during analysis 2. Function Pointer Identification - Improved function pointer type identification handling, enabling correct resolution and analysis of function pointer assignments and calls within the AST 3. QualType Enhancements - Updated QualType to better represent types with qualifiers such as `const`, `volatile`, and others 4. QualDetails Addition - Introduced a new structure, QualDetails, to encapsulate additional metadata about type qualifiers, e.g., `ptr`, `signed`, `float`, `struct`, `union`, `array`, `promotable`, `integer`, `func_ptr` 5. Return Type Information - Updated the AST to expose detailed return type information, similar to VarDecl 6. JSON Debloating: - Reduced the size of JSON output by caching IDs and then using the `refID` key for referring back to the original ID 7. Added Missing Range and ID for CXXCtorInitializer ### Examples ```c // recursive pointer resolution int ** c; ``` data:image/s3,"s3://crabby-images/96edf/96edf64b966769af7823a74abee382025163e769" alt="image" ```c // function pointer information long (*foo)(int (*)(short)); ``` data:image/s3,"s3://crabby-images/0b663/0b663afd11b12ddafb5adcaab564c7ea930600fe" alt="image" ```c // return type information int runFunctionTestA( char a ); ``` data:image/s3,"s3://crabby-images/8ec62/8ec6248f07041920a15cdd9446dd526762275205" alt="image" ```c // refID usage example. int a; // (not in image) first time encountering an int int *b; // (not in image) first time encountering int *, but second time encountering int int **c; // first time encountering int **, but second time encountering int * and third time int ``` data:image/s3,"s3://crabby-images/35e6c/35e6c0f821eb9122999b5fae9461e56504854094" alt="image" >From 64ead42b291bd23bf47a37d6bcd6dea6b367e760 Mon Sep 17 00:00:00 2001 From: Nicholas Reimer <nrei...@sei.cmu.edu> Date: Tue, 8 Oct 2024 16:40:58 -0600 Subject: [PATCH] modified AST for SEI redemption project --- clang/include/clang/AST/ASTNodeTraverser.h | 59 +++++++++- clang/include/clang/AST/JSONNodeDumper.h | 45 +++++++- clang/include/clang/AST/TextNodeDumper.h | 22 ++++ clang/lib/AST/JSONNodeDumper.cpp | 124 ++++++++++++++++++++- clang/lib/AST/TextNodeDumper.cpp | 8 ++ clang/unittests/AST/ASTTraverserTest.cpp | 4 + 6 files changed, 252 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index a443a88bab1f2d..3299011771f332 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -11,6 +11,22 @@ // similar to RecursiveASTVisitor. // //===----------------------------------------------------------------------===// +// +// Modifications to this file by SEI staff are copyright Carnegie Mellon +// University and contributed under the Apache License v2.0 with LLVM +// Exceptions. +// +// SEI Contributions are made with funding sand support from the Department of +// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University +// for the operation of the Software Engineering Institute, a federally funded +// research and development center. +// +// The view, opinions, and/or findings contained in this material are those of +// the author(s) and should not be construed as an official Government position, +// policy, or decision, unless designated by other documentation. +// DM24-0194 +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_ASTNODETRAVERSER_H #define LLVM_CLANG_AST_ASTNODETRAVERSER_H @@ -177,14 +193,34 @@ class ASTNodeTraverser if (!SQT.Quals.hasQualifiers()) return Visit(SQT.Ty); - getNodeDelegate().AddChild([=] { + // SEI: changed from default label to "qualTypeDetail" + getNodeDelegate().AddChild("qualTypeDetail", [this, T] { getNodeDelegate().Visit(T); Visit(T.split().Ty); }); + + // SEI function pointer support. this gets called whenever the three + // conditions are met: + // 1. the function pointer is not typedef'd + // 2. after Visit(VarDecl *) gets called + // 3. if VarDecl determines this is a function pointer + if (T->isFunctionPointerType()) { + // create as a child node to this type + getNodeDelegate().AddChild( + [=] { getNodeDelegate().Visit(T->getPointeeType()); }); + } + + // SEI: traverse PointerType information + if (T->isPointerType()) + Visit(T->getPointeeType()); } + // SEI: traverse ReturnType information + void VisitReturnType(QualType T) { getNodeDelegate().VisitReturnType(T); } + void Visit(const Type *T) { - getNodeDelegate().AddChild([=] { + // SEI: renamed this from default label + getNodeDelegate().AddChild("typeDetails", [this, T] { getNodeDelegate().Visit(T); if (!T) return; @@ -209,7 +245,8 @@ class ASTNodeTraverser } void Visit(const Attr *A) { - getNodeDelegate().AddChild([=] { + // SEI: renamed from default label + getNodeDelegate().AddChild("attrDetails", [this, A] { getNodeDelegate().Visit(A); ConstAttrVisitor<Derived>::Visit(A); }); @@ -410,8 +447,17 @@ class ASTNodeTraverser Visit(T->getSizeExpr()); } void VisitVectorType(const VectorType *T) { Visit(T->getElementType()); } - void VisitFunctionType(const FunctionType *T) { Visit(T->getReturnType()); } + void VisitFunctionType(const FunctionType *T) { + // SEI: add functionDetails, incl. return type + getNodeDelegate().AddChild("functionDetails", [this, T] { + getNodeDelegate().VisitFunctionType(T); + getNodeDelegate().VisitReturnType(T->getReturnType()); + }); + } + void VisitFunctionProtoType(const FunctionProtoType *T) { + + // SEI: visit the function type. this will force the return type info too. VisitFunctionType(T); for (const QualType &PT : T->getParamTypes()) Visit(PT); @@ -560,6 +606,11 @@ class ASTNodeTraverser Visit(TSI->getTypeLoc()); if (D->hasInit()) Visit(D->getInit()); + + // SEI: if this is a function pointer, then we need to get the + // FunctionProtoType and then make add'l visits. if the FP is typedef'd, + // then this behavior occurs for us outside of Visit(VarDecl *) + getNodeDelegate().Visit(D->getType()); } void VisitDecompositionDecl(const DecompositionDecl *D) { diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h index 9422c8fceccfbd..25cec3abcbae82 100644 --- a/clang/include/clang/AST/JSONNodeDumper.h +++ b/clang/include/clang/AST/JSONNodeDumper.h @@ -10,6 +10,22 @@ // a JSON. // //===----------------------------------------------------------------------===// +// +// Modifications to this file by SEI staff are copyright Carnegie Mellon +// University and contributed under the Apache License v2.0 with LLVM +// Exceptions. +// +// SEI Contributions are made with funding sand support from the Department of +// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University +// for the operation of the Software Engineering Institute, a federally funded +// research and development center. +// +// The view, opinions, and/or findings contained in this material are those of +// the author(s) and should not be construed as an official Government position, +// policy, or decision, unless designated by other documentation. +// DM24-0194 +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_JSONNODEDUMPER_H #define LLVM_CLANG_AST_JSONNODEDUMPER_H @@ -26,6 +42,9 @@ #include "clang/AST/Type.h" #include "llvm/Support/JSON.h" +// SEI: added for caching addresses of certain visited nodes +#include <unordered_set> + namespace clang { class APValue; @@ -111,8 +130,8 @@ class NodeStreamer { // Dumps AST nodes in JSON format. There is no implied stability for the // content or format of the dump between major releases of Clang, other than it // being valid JSON output. Further, there is no requirement that the -// information dumped is a complete representation of the AST, only that the -// information presented is correct. +// information dumped be a complete representation of the AST, only that the +// information presented be correct. class JSONNodeDumper : public ConstAttrVisitor<JSONNodeDumper>, public comments::ConstCommentVisitor<JSONNodeDumper, void, @@ -132,6 +151,9 @@ class JSONNodeDumper StringRef LastLocFilename, LastLocPresumedFilename; unsigned LastLocLine, LastLocPresumedLine; + // SEI: caches addresses for QualType nodes that are duplicates + std::unordered_set<void *> AddressCache; + using InnerAttrVisitor = ConstAttrVisitor<JSONNodeDumper>; using InnerCommentVisitor = comments::ConstCommentVisitor<JSONNodeDumper, void, @@ -184,6 +206,18 @@ class JSONNodeDumper StringRef getCommentCommandName(unsigned CommandID) const; + /// SEI: simple cacher for addresses of nodes to reduce + /// bloat caused by SEI changes + /// Return True if it's already cached, otherwise false + bool cacheAddress(void *p) { + if (AddressCache.find(p) == AddressCache.end()) { + AddressCache.insert(p); + return false; + } + + return true; + } + public: JSONNodeDumper(raw_ostream &OS, const SourceManager &SrcMgr, ASTContext &Ctx, const PrintingPolicy &PrintPolicy, @@ -196,6 +230,13 @@ class JSONNodeDumper void Visit(const Stmt *Node); void Visit(const Type *T); void Visit(QualType T); + + // SEI: get specific details from the qual type + void VisitQualTypeDetails(QualType T); + + // SEI: traverse ReturnType information + void VisitReturnType(QualType T); + void Visit(const Decl *D); void Visit(TypeLoc TL); diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 9c320c8ae3e54c..89f0fb110b31a2 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -9,6 +9,22 @@ // This file implements AST dumping of components of individual AST nodes. // //===----------------------------------------------------------------------===// +// +// Modifications to this file by SEI staff are copyright Carnegie Mellon +// University and contributed under the Apache License v2.0 with LLVM +// Exceptions. +// +// SEI Contributions are made with funding sand support from the Department of +// Defense under Contract No. FA8702-15-D-0002 with Carnegie Mellon University +// for the operation of the Software Engineering Institute, a federally funded +// research and development center. +// +// The view, opinions, and/or findings contained in this material are those of +// the author(s) and should not be construed as an official Government position, +// policy, or decision, unless designated by other documentation. +// DM24-0194 +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_TEXTNODEDUMPER_H #define LLVM_CLANG_AST_TEXTNODEDUMPER_H @@ -183,6 +199,12 @@ class TextNodeDumper void Visit(TypeLoc); + // SEI: added support for getting ReturnType information + void VisitReturnType(QualType T); + + // SEI: added support for more QT details. it's a passthrough for this class + void VisitQualTypeDetails(QualType T) {} + void Visit(const Decl *D); void Visit(const CXXCtorInitializer *Init); diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index ddbe2136a671f3..72b2c2b9b8aa80 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -71,6 +71,19 @@ void JSONNodeDumper::Visit(const Stmt *S) { } void JSONNodeDumper::Visit(const Type *T) { + // SEI: ensure FPTs are debloated. this can be expanded to ALL types, if + // desired + if (T && cacheAddress((void *)T)) { + // add this as a child to know that it's a Type + AddChild("typeDetails", + [=] { JOS.attribute("refId", createPointerRepresentation(T)); }); + + InnerTypeVisitor::Visit(T); + // SEI + VisitQualTypeDetails(T->getCanonicalTypeInternal()); + return; + } + JOS.attribute("id", createPointerRepresentation(T)); if (!T) @@ -87,13 +100,44 @@ void JSONNodeDumper::Visit(const Type *T) { T->containsUnexpandedParameterPack()); attributeOnlyIfTrue("isImported", T->isFromAST()); InnerTypeVisitor::Visit(T); + // SEI + VisitQualTypeDetails(T->getCanonicalTypeInternal()); } void JSONNodeDumper::Visit(QualType T) { - JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr())); - JOS.attribute("kind", "QualType"); - JOS.attribute("type", createQualType(T)); - JOS.attribute("qualifiers", T.split().Quals.getAsString()); + + // SEI: used AddChild to prevent qualType from being part added to a list + // JOS.attributeArray("qualTypes", [=] { + + // SEI: force qualType into its own block, otherwise multiple Visits + // create a bunch of siblings, which is invalid JSON + JOS.attributeBegin("qualType"); + JOS.objectBegin(); + + // SEI: cache visited addresses and add only its refId + // instead of the kind, type, quals, but leave the qual type details + // because those can differ among IDs + if (cacheAddress(T.getAsOpaquePtr())) { + JOS.attribute("refId", createPointerRepresentation(T.getAsOpaquePtr())); + } else { + JOS.attribute("id", createPointerRepresentation(T.getAsOpaquePtr())); + JOS.attribute("kind", "QualType"); + JOS.attribute("type", createQualType(T)); + JOS.attribute("qualifiers", T.split().Quals.getAsString()); + } + + // SEI: get add'l info required for redemption analysis + // the qual type details differ even among cached references + VisitQualTypeDetails(T); + + // SEI: if this is a pointer type, then recursively call ourselves + // until it's not + if (T->isPointerType()) + Visit(T->getPointeeType()); + + JOS.objectEnd(); + JOS.attributeEnd(); + //} ); } void JSONNodeDumper::Visit(TypeLoc TL) { @@ -111,6 +155,64 @@ void JSONNodeDumper::Visit(TypeLoc TL) { [TL, this] { writeSourceRange(TL.getSourceRange()); }); } +void JSONNodeDumper::VisitQualTypeDetails(QualType T) { + // SEI: get more detailed info on type. this info is not transferrable + // with the refId, so this must be called on every type even if that type + // has been cached + JOS.attributeBegin("qualDetails"); + JOS.arrayBegin(); + + auto CT = T->getCanonicalTypeInternal(); + + if (CT->isStructureType()) + JOS.value("struct"); + + if (CT->isNullPtrType()) + JOS.value("null"); + if (CT->isUndeducedType()) + JOS.value("undeduced"); + + if (CT->isPointerType()) + JOS.value("ptr"); + if (CT->isVoidType()) + JOS.value("void"); + + if (CT->isSignedIntegerType()) + JOS.value("signed"); + if (CT->isUnsignedIntegerType()) + JOS.value("unsigned"); + if (CT->isIntegerType()) + JOS.value("integer"); + if (CT->isFloatingType()) + JOS.value("fpp"); + if (CT->isEnumeralType()) + JOS.value("enum"); + if (CT->isUnionType()) + JOS.value("union"); + if (CT->isFunctionPointerType()) + JOS.value("func_ptr"); + if (CT->isTypedefNameType()) + JOS.value("type_def"); + if (CT->isArrayType()) + JOS.value("array"); + + JOS.arrayEnd(); + JOS.attributeEnd(); +} + +// SEI: capture the return info in a nested JSON block +void JSONNodeDumper::VisitReturnType(QualType T) { + // using this function allows us to easily wrap just the returnType + // section into its own JSON block. if we do this in ASTNodeTraverser, + // then the TextNodeDumper works as expected but the JSONNodeDumper + // rolls all siblings into the returnType node with those siblings as child + // nodes + + JOS.attributeObject("returnTypeDetail", [=] { Visit(T); }); + + // Visit(T); +} + void JSONNodeDumper::Visit(const Decl *D) { JOS.attribute("id", createPointerRepresentation(D)); @@ -175,6 +277,14 @@ void JSONNodeDumper::Visit(const TemplateArgument &TA, SourceRange R, void JSONNodeDumper::Visit(const CXXCtorInitializer *Init) { JOS.attribute("kind", "CXXCtorInitializer"); + + // SEI: added id for + JOS.attribute("id", createPointerRepresentation(Init)); + + // SEI: added range for CXXCtorInitializers + JOS.attributeObject( + "range", [Init, this] { writeSourceRange(Init->getSourceRange()); }); + if (Init->isAnyMemberInitializer()) JOS.attribute("anyInit", createBareDeclRef(Init->getAnyMember())); else if (Init->isBaseInitializer()) @@ -958,6 +1068,10 @@ void JSONNodeDumper::VisitFieldDecl(const FieldDecl *FD) { attributeOnlyIfTrue("modulePrivate", FD->isModulePrivate()); attributeOnlyIfTrue("isBitfield", FD->isBitField()); attributeOnlyIfTrue("hasInClassInitializer", FD->hasInClassInitializer()); + + // SEI: had to add this in b/c FieldDecls do not seem to call + // Visit(QualType) + Visit(FD->getType()); } void JSONNodeDumper::VisitFunctionDecl(const FunctionDecl *FD) { @@ -1346,6 +1460,8 @@ void JSONNodeDumper::VisitDeclRefExpr(const DeclRefExpr *DRE) { case NOUR_Discarded: JOS.attribute("nonOdrUseReason", "discarded"); break; } attributeOnlyIfTrue("isImmediateEscalating", DRE->isImmediateEscalating()); + // SEI: this doesn't call VisitNamedDecl, so we force it + Visit(DRE->getType()); } void JSONNodeDumper::VisitSYCLUniqueStableNameExpr( diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 15b23d60c3ffab..7d87c4d1b5eeb8 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -241,6 +241,14 @@ void TextNodeDumper::Visit(QualType T) { OS << " " << T.split().Quals.getAsString(); } +void TextNodeDumper::VisitReturnType(QualType T) { + OS << "ReturnType"; + dumpPointer(T.getAsOpaquePtr()); + OS << " "; + dumpBareType(T, false); + OS << " " << T.split().Quals.getAsString(); +} + void TextNodeDumper::Visit(TypeLoc TL) { if (!TL) { ColorScope Color(OS, ShowColors, NullColor); diff --git a/clang/unittests/AST/ASTTraverserTest.cpp b/clang/unittests/AST/ASTTraverserTest.cpp index 8b6e3e90c0ea67..5f10b69862171a 100644 --- a/clang/unittests/AST/ASTTraverserTest.cpp +++ b/clang/unittests/AST/ASTTraverserTest.cpp @@ -89,6 +89,10 @@ class NodeTreePrinter : public TextTreeStructure { } } + // SEI: added for class completeness + void VisitFunctionType(const FunctionType *T) {} + void VisitReturnType(QualType T) {} + template <typename... T> void Visit(T...) {} }; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits