https://github.com/majidkamali1370 updated https://github.com/llvm/llvm-project/pull/149135
>From aab024f20b301aca4ab5299c88af2e32c72a3277 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Fri, 11 Jul 2025 23:16:27 +0100 Subject: [PATCH 01/16] Follow llvm and clang tutorial to create a sample tool --- clang-tools-extra/CMakeLists.txt | 1 + .../feature-extractor/CMakeLists.txt | 15 +++++ clang-tools-extra/feature-extractor/main.cpp | 55 +++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/CMakeLists.txt create mode 100644 clang-tools-extra/feature-extractor/main.cpp diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 6b6f2b1ca2276..11879a03eba74 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -28,6 +28,7 @@ add_subdirectory(clang-query) add_subdirectory(include-cleaner) add_subdirectory(pp-trace) add_subdirectory(tool-template) +add_subdirectory(feature-extractor) option(CLANG_TOOLS_EXTRA_INCLUDE_DOCS "Generate build targets for the Clang Extra Tools docs." ${LLVM_INCLUDE_DOCS}) diff --git a/clang-tools-extra/feature-extractor/CMakeLists.txt b/clang-tools-extra/feature-extractor/CMakeLists.txt new file mode 100644 index 0000000000000..6ae7b78a7ecad --- /dev/null +++ b/clang-tools-extra/feature-extractor/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_executable(feature-extractor + main.cpp +) + +target_link_libraries(feature-extractor + PRIVATE + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangSerialization + clangTooling +) diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp new file mode 100644 index 0000000000000..6fec15a75e203 --- /dev/null +++ b/clang-tools-extra/feature-extractor/main.cpp @@ -0,0 +1,55 @@ +// Declares clang::SyntaxOnlyAction. +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +// Declares llvm::cl::extrahelp. +#include "llvm/Support/CommandLine.h" + +using namespace clang::tooling; +using namespace clang::ast_matchers; +using namespace llvm; + +// Apply a custom category to all command-line options so that they are the +// only ones displayed. +static llvm::cl::OptionCategory MyToolCategory("feature-extractor options"); + +// CommonOptionsParser declares HelpMessage with a description of the common +// command-line options related to the compilation database and input files. +// It's nice to have this help message in all tools. +static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); + +// A help message for this specific tool can be added afterwards. +static cl::extrahelp MoreHelp("\nMore help text...\n"); + +StatementMatcher LoopMatcher = + forStmt(hasLoopInit(declStmt(hasSingleDecl( + varDecl(hasInitializer(integerLiteral(equals(0)))))))) + .bind("forLoop"); + +class LoopPrinter : public MatchFinder::MatchCallback { +public: + virtual void run(const MatchFinder::MatchResult &Result) override { + if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop")) + FS->dump(); + } +}; + +int main(int argc, const char **argv) { + auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory); + if (!ExpectedParser) { + // Fail gracefully for unsupported options. + llvm::errs() << ExpectedParser.takeError(); + return 1; + } + CommonOptionsParser &OptionsParser = ExpectedParser.get(); + ClangTool Tool(OptionsParser.getCompilations(), + OptionsParser.getSourcePathList()); + + LoopPrinter Printer; + MatchFinder Finder; + Finder.addMatcher(LoopMatcher, &Printer); + + return Tool.run(newFrontendActionFactory(&Finder).get()); +} >From 318b398e3da0e8b931bea849b188a30c6d411791 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Sat, 12 Jul 2025 15:40:31 +0100 Subject: [PATCH 02/16] Add FeatureManager class for easy integration of extracted features --- .../feature-extractor/FeatureManager.h | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/FeatureManager.h diff --git a/clang-tools-extra/feature-extractor/FeatureManager.h b/clang-tools-extra/feature-extractor/FeatureManager.h new file mode 100644 index 0000000000000..5de1986630b7d --- /dev/null +++ b/clang-tools-extra/feature-extractor/FeatureManager.h @@ -0,0 +1,38 @@ +#pragma once + +#include <tuple> + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" + +using namespace clang::tooling; +using namespace clang::ast_matchers; +using namespace llvm; + +template <typename... Features> class FeatureManager { + std::tuple<Features...> features; + MatchFinder match_finder; + +public: + FeatureManager() { + ( + [&]() { + for (const auto &matcher : Features::Matchers) + match_finder.addMatcher(matcher, &std::get<Features>(features)); + }(), + ...); + } + + MatchFinder *get_match_finder() { return &match_finder; } + + ~FeatureManager() { + llvm::outs() << "\n"; + + ( + [&]() { + llvm::outs() << Features::get_title() << " : " + << std::get<Features>(features).get_result() << "\n"; + }(), + ...); + } +}; >From 8978a24b1b9a2fe0c2daa393b65b51dcce7b5528 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Sun, 13 Jul 2025 23:43:35 +0100 Subject: [PATCH 03/16] Add NaryTree to hold items in tree-like structure --- .../feature-extractor/NaryTree.h | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/NaryTree.h diff --git a/clang-tools-extra/feature-extractor/NaryTree.h b/clang-tools-extra/feature-extractor/NaryTree.h new file mode 100644 index 0000000000000..57109bfd2e8f7 --- /dev/null +++ b/clang-tools-extra/feature-extractor/NaryTree.h @@ -0,0 +1,102 @@ +#pragma once + +#include <memory> +#include <optional> +#include <vector> + +template <typename T> class NaryTree { + struct Node { + Node(const T &t) : value(t) {} + + std::vector<std::unique_ptr<Node>> children; + T value; + }; + + using Element = std::unique_ptr<Node>; + + Element root{nullptr}; + + template <typename Func> + void traverse_post_order_impl(Element *parent, Element ¤t, int depth, + bool is_leaf, Func &&f) { + for (auto &child : current->children) + traverse_post_order_impl(¤t, child, depth + 1, + !child->children.size(), f); + + f(parent, current, depth, !current->children.size()); + } + + template <typename Func> + void traverse_pre_order_impl(Element *parent, Element ¤t, int depth, + bool is_leaf, Func &&f) { + f(parent, current, depth, !current->children.size()); + + for (auto &child : current->children) + traverse_pre_order_impl(¤t, child, depth + 1, + !child->children.size(), f); + } + + const Element *find_node(const Element ¤t, const T &data) const { + if (current) { + if (current->value == data) + return ¤t; + else + for (const auto &child : current->children) + return find_node(child, data); + } + + return nullptr; + } + +public: + struct TraverseResult { + std::optional<T> parent; + T &self; + int depth; + bool is_leaf; + }; + + template <typename Func> void traverse_post_order(Func &&f) { + traverse_post_order_impl( + nullptr, root, 0, + [&f](Element *parent, Element &n, int depth, bool is_leaf) { + std::optional<T> opt; + + if (parent) + opt = (*parent)->value; + + f(TraverseResult{opt, n->value, depth, is_leaf}); + }); + } + + template <typename Func> void traverse_pre_order(Func &&f) { + traverse_pre_order_impl( + nullptr, root, 0, !root->children.size(), + [&f](Element *parent, Element &n, int depth, bool is_leaf) { + std::optional<T> opt; + + if (parent) + opt = (*parent)->value; + + f(TraverseResult{opt, n->value, depth, is_leaf}); + }); + } + + bool contains(const T &data) const { + return find_node(root, data) != nullptr; + } + + bool add_node(const T &parentData, const T &data) { + if (!root) { + root = std::make_unique<Node>(data); + return true; + } + + if (auto node = find_node(root, parentData)) { + (*node)->children.push_back(std::make_unique<Node>(data)); + return true; + } + + return false; + } +}; >From e3b3a8c2fb2bc3d58237c97fb3ddae5d288c615e Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Sun, 13 Jul 2025 23:47:13 +0100 Subject: [PATCH 04/16] Add NumLoops feature and use in FeatureManager --- .../feature-extractor/features/NumLoops.h | 42 +++++++++++++++++++ clang-tools-extra/feature-extractor/main.cpp | 26 ++++-------- 2 files changed, 50 insertions(+), 18 deletions(-) create mode 100644 clang-tools-extra/feature-extractor/features/NumLoops.h diff --git a/clang-tools-extra/feature-extractor/features/NumLoops.h b/clang-tools-extra/feature-extractor/features/NumLoops.h new file mode 100644 index 0000000000000..cad96c7775c23 --- /dev/null +++ b/clang-tools-extra/feature-extractor/features/NumLoops.h @@ -0,0 +1,42 @@ +#pragma once + +#include <array> +#include <cstddef> + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" + +#include "../utils.h" + +using namespace clang; +using namespace clang::tooling; +using namespace clang::ast_matchers; +using namespace llvm; + +class NumLoops : public MatchFinder::MatchCallback { +private: + std::size_t num_loops{0}; + +public: + static inline std::array Matchers = {forStmt().bind("forLoops"), + whileStmt().bind("whileLoops")}; + + virtual void run(const MatchFinder::MatchResult &result) override { + auto context = result.Context; + + const auto fs = result.Nodes.getNodeAs<ForStmt>("forLoops"); + const auto ws = result.Nodes.getNodeAs<WhileStmt>("whileLoops"); + + // We do not want to convert header files! + if ((!fs || + !context->getSourceManager().isWrittenInMainFile(fs->getForLoc())) && + (!ws || + !context->getSourceManager().isWrittenInMainFile(ws->getWhileLoc()))) + return; + + num_loops++; + } + + static const char *get_title() { return "num_loops"; } + std::size_t get_result() const { return num_loops; } +}; diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp index 6fec15a75e203..3b75cb6f7d419 100644 --- a/clang-tools-extra/feature-extractor/main.cpp +++ b/clang-tools-extra/feature-extractor/main.cpp @@ -7,6 +7,10 @@ // Declares llvm::cl::extrahelp. #include "llvm/Support/CommandLine.h" +#include "FeatureManager.h" + +#include "features/NumLoops.h" + using namespace clang::tooling; using namespace clang::ast_matchers; using namespace llvm; @@ -23,33 +27,19 @@ static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); // A help message for this specific tool can be added afterwards. static cl::extrahelp MoreHelp("\nMore help text...\n"); -StatementMatcher LoopMatcher = - forStmt(hasLoopInit(declStmt(hasSingleDecl( - varDecl(hasInitializer(integerLiteral(equals(0)))))))) - .bind("forLoop"); - -class LoopPrinter : public MatchFinder::MatchCallback { -public: - virtual void run(const MatchFinder::MatchResult &Result) override { - if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop")) - FS->dump(); - } -}; - int main(int argc, const char **argv) { auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory); + if (!ExpectedParser) { // Fail gracefully for unsupported options. llvm::errs() << ExpectedParser.takeError(); return 1; } + CommonOptionsParser &OptionsParser = ExpectedParser.get(); ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); + FeatureManager<NumLoops> manager; - LoopPrinter Printer; - MatchFinder Finder; - Finder.addMatcher(LoopMatcher, &Printer); - - return Tool.run(newFrontendActionFactory(&Finder).get()); + return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get()); } >From 0093688c74a0efdfc5b29ab9ca46ee5829c13ebe Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 14:51:08 +0100 Subject: [PATCH 05/16] Add various util functions --- clang-tools-extra/feature-extractor/utils.h | 80 +++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/utils.h diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h new file mode 100644 index 0000000000000..e30e702e3202c --- /dev/null +++ b/clang-tools-extra/feature-extractor/utils.h @@ -0,0 +1,80 @@ +#pragma once + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" + +#include <optional> + +using namespace clang; + +/// +/// Check if two ValueDecl pointers refer to the same variable in AST +/// +static inline bool are_same_variable(const ValueDecl *First, + const ValueDecl *Second) { + return First && Second && + First->getCanonicalDecl() == Second->getCanonicalDecl(); +} + +/// +/// Check if for statement is defined in the translation unit file (not headers) +/// +static inline bool is_in_main_file(ASTContext *context, const ForStmt *fs) { + return fs && context->getSourceManager().isWrittenInMainFile(fs->getForLoc()); +} + +/// +/// For a given for statement, tries to extract loop bound in the condition +/// +static inline std::optional<llvm::APInt> +get_for_condition_range_value(const ForStmt *fs) { + const Expr *cond = fs->getCond(); + + if (cond) { + if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(cond)) { + const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts(); + + if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(RHS)) + return IL->getValue(); + } + } + + return std::nullopt; +} + +/// +/// For a given Stmt \s, tries to return the nearest ancestor of type +/// StatementType. Return nullptr in case no parent of given type was found. +/// +template <typename StatementType> +static inline const StatementType *get_parent_stmt(ASTContext *context, + const Stmt *s) { + auto parents = context->getParents(*s); + + if (parents.empty()) + return nullptr; + + for (auto &p : parents) + if (const StatementType *parent_stmt = p.get<StatementType>()) + return parent_stmt; + else + return get_parent_stmt<StatementType>(context, p.get<Stmt>()); + + return nullptr; +} + +/// +/// Run a callable on all parents of type StatementType of \s recursively goes +/// up. +/// +template <typename StatementType, typename Func> +static inline void run_on_all_parents_of_type(ASTContext *context, + const Stmt *s, Func &&f) { + auto parent = get_parent_stmt<StatementType>(context, s); + + while (parent) { + f(context, parent); + parent = get_parent_stmt<StatementType>(context, + dyn_cast<StatementType>(parent)); + } +} >From eed01f169c10fd5d8cec8d0a47a58cfa82018ff2 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 14:51:43 +0100 Subject: [PATCH 06/16] Add get_parent API to NaryTree --- clang-tools-extra/feature-extractor/NaryTree.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/clang-tools-extra/feature-extractor/NaryTree.h b/clang-tools-extra/feature-extractor/NaryTree.h index 57109bfd2e8f7..5acc9421e3bf8 100644 --- a/clang-tools-extra/feature-extractor/NaryTree.h +++ b/clang-tools-extra/feature-extractor/NaryTree.h @@ -86,6 +86,19 @@ template <typename T> class NaryTree { return find_node(root, data) != nullptr; } + std::optional<T> get_parent(const T &data) { + std::optional<T> result; + + traverse_pre_order_impl( + nullptr, root, 0, !root->children.size(), + [&result, &data](Element *parent, Element &n, int depth, bool is_leaf) { + if (parent && n->value == data) + result = (*parent)->value; + }); + + return result; + } + bool add_node(const T &parentData, const T &data) { if (!root) { root = std::make_unique<Node>(data); >From 2675b130fc81ff1c4b671e87d9816617897ab783 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 14:54:37 +0100 Subject: [PATCH 07/16] Add LoopsData class to contain for loops AST nodes --- .../feature-extractor/LoopsData.h | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/LoopsData.h diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h new file mode 100644 index 0000000000000..58b704732fe0a --- /dev/null +++ b/clang-tools-extra/feature-extractor/LoopsData.h @@ -0,0 +1,42 @@ +#pragma once + +#include <cstdint> +#include <unordered_map> +#include <vector> + +#include <clang/AST/Stmt.h> + +#include "NaryTree.h" + +using namespace clang; + +class LoopsData { +public: + using TreeType = NaryTree<const Stmt *>; + + void add_for(clang::ASTContext *context, const Stmt *parent, + const Stmt *self) { + ids[self] = self->getID(*context); + + if (parent == nullptr) { + loops.push_back({}); + loops.back().add_node(nullptr, self); + } else { + auto ntree = std::find_if(loops.begin(), loops.end(), + [&parent](const NaryTree<const Stmt *> &tree) { + return tree.contains(parent); + }); + + if (ntree != loops.end()) { + ntree->add_node(parent, self); + } + } + } + + auto &get_ids() { return ids; } + auto &get_loops() { return loops; } + +private: + std::unordered_map<const Stmt *, std::int64_t> ids; + std::vector<TreeType> loops; +}; >From 07af8dd146fa516bb503929f78a820425d9c71c6 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 19:05:06 +0100 Subject: [PATCH 08/16] Add OmpRegions feature --- .../feature-extractor/features/OmpRegions.h | 67 +++++++++++++++++++ clang-tools-extra/feature-extractor/main.cpp | 3 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/feature-extractor/features/OmpRegions.h diff --git a/clang-tools-extra/feature-extractor/features/OmpRegions.h b/clang-tools-extra/feature-extractor/features/OmpRegions.h new file mode 100644 index 0000000000000..666b49bd8482c --- /dev/null +++ b/clang-tools-extra/feature-extractor/features/OmpRegions.h @@ -0,0 +1,67 @@ +#pragma once + +#include <array> +#include <cstddef> +#include <numeric> +#include <unordered_map> + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" + +using namespace clang; +using namespace clang::ast_matchers; + +class OmpRegions : public MatchFinder::MatchCallback { + std::unordered_map<std::string, unsigned> regions_count; + +public: + static inline std::array Matchers = { + ompExecutableDirective().bind("ompRegion")}; + + virtual void run(const MatchFinder::MatchResult &result) override { + if (const auto *omp_directive = + result.Nodes.getNodeAs<OMPExecutableDirective>("ompRegion")) { + std::string omp_type; + + using namespace llvm; + if (isa<OMPParallelDirective>(omp_directive)) + omp_type = "parallel"; + else if (isa<OMPForDirective>(omp_directive)) + omp_type = "for"; + else if (isa<OMPParallelForDirective>(omp_directive)) + omp_type = "parallel for"; + else if (isa<OMPSingleDirective>(omp_directive)) + omp_type = "single"; + else if (isa<OMPMasterDirective>(omp_directive)) + omp_type = "master"; + else if (isa<OMPCriticalDirective>(omp_directive)) + omp_type = "critical"; + else if (isa<OMPTaskDirective>(omp_directive)) + omp_type = "task"; + else if (isa<OMPSectionDirective>(omp_directive)) + omp_type = "section"; + else if (isa<OMPSectionsDirective>(omp_directive)) + omp_type = "sections"; + else if (isa<OMPBarrierDirective>(omp_directive)) + omp_type = "barrier"; + else + omp_type = "other"; + + regions_count[omp_type]++; + } + } + + static const char *get_title() { return "opm_regions"; } + std::size_t get_result() { +#ifndef NDEBUG + llvm::outs() << "\n"; + for (const auto &pair : regions_count) + llvm::outs() << "OMP region [" << pair.first << "]: " << pair.second + << "\n"; +#endif + + return std::accumulate( + regions_count.cbegin(), regions_count.cend(), std::size_t{0}, + [](std::size_t acc, const auto &p) { return acc + p.second; }); + } +}; diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp index 3b75cb6f7d419..4109a46ed3f1e 100644 --- a/clang-tools-extra/feature-extractor/main.cpp +++ b/clang-tools-extra/feature-extractor/main.cpp @@ -10,6 +10,7 @@ #include "FeatureManager.h" #include "features/NumLoops.h" +#include "features/OmpRegions.h" using namespace clang::tooling; using namespace clang::ast_matchers; @@ -39,7 +40,7 @@ int main(int argc, const char **argv) { CommonOptionsParser &OptionsParser = ExpectedParser.get(); ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); - FeatureManager<NumLoops> manager; + FeatureManager<NumLoops, OmpRegions> manager; return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get()); } >From 91e806aae264598f7b8e6bda8588f8b38b78e783 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 23:24:36 +0100 Subject: [PATCH 09/16] Bugfix get_parent_stamt in utils --- clang-tools-extra/feature-extractor/utils.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h index e30e702e3202c..ceece1b3fb96b 100644 --- a/clang-tools-extra/feature-extractor/utils.h +++ b/clang-tools-extra/feature-extractor/utils.h @@ -54,11 +54,12 @@ static inline const StatementType *get_parent_stmt(ASTContext *context, if (parents.empty()) return nullptr; - for (auto &p : parents) - if (const StatementType *parent_stmt = p.get<StatementType>()) - return parent_stmt; - else - return get_parent_stmt<StatementType>(context, p.get<Stmt>()); + const auto p = parents[0]; + + if (const StatementType *parent_stmt = p.get<StatementType>()) + return parent_stmt; + else if (const auto pStmt = p.get<Stmt>()) + return get_parent_stmt<StatementType>(context, pStmt); return nullptr; } >From d4894ce223238bb38dcc2cfb756d78afc03afe85 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Tue, 15 Jul 2025 23:58:08 +0100 Subject: [PATCH 10/16] Add function to evaluate more complex for loop conditions --- clang-tools-extra/feature-extractor/utils.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h index ceece1b3fb96b..6ef00c065a3e7 100644 --- a/clang-tools-extra/feature-extractor/utils.h +++ b/clang-tools-extra/feature-extractor/utils.h @@ -25,6 +25,7 @@ static inline bool is_in_main_file(ASTContext *context, const ForStmt *fs) { /// /// For a given for statement, tries to extract loop bound in the condition +/// This function extracts literal conditions from for loop condition /// static inline std::optional<llvm::APInt> get_for_condition_range_value(const ForStmt *fs) { @@ -42,6 +43,26 @@ get_for_condition_range_value(const ForStmt *fs) { return std::nullopt; } +/// +/// For a given for statement, tries to extract loop bound in the condition +/// This function evaluates macro conditions from for loop condition +/// +static inline std::optional<llvm::APSInt> +get_for_condition_range_value(ASTContext *context, const ForStmt *fs) { + + if (const Expr *cond = fs->getCond(); cond) { + if (const BinaryOperator *binOp = dyn_cast<BinaryOperator>(cond)) { + const Expr *rhs = binOp->getRHS(); + clang::Expr::EvalResult eval; + if (rhs->EvaluateAsInt(eval, *context)) { + return eval.Val.getInt(); + } + } + } + + return std::nullopt; +} + /// /// For a given Stmt \s, tries to return the nearest ancestor of type /// StatementType. Return nullptr in case no parent of given type was found. >From 0e3d022638e5fd87b1ebeb73e92b70ac66ef01c1 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Wed, 16 Jul 2025 01:00:54 +0100 Subject: [PATCH 11/16] Improve LoopsData to accept more data other than Stmt pointer --- .../feature-extractor/LoopsData.h | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h index 58b704732fe0a..aae45298e2e89 100644 --- a/clang-tools-extra/feature-extractor/LoopsData.h +++ b/clang-tools-extra/feature-extractor/LoopsData.h @@ -12,20 +12,29 @@ using namespace clang; class LoopsData { public: - using TreeType = NaryTree<const Stmt *>; + struct MetaData { + const Stmt *for_stmt; + + MetaData(const Stmt *fs) : for_stmt(fs) {} + + friend bool operator==(const MetaData &lhs, const MetaData &rhs) { + return lhs.for_stmt == rhs.for_stmt; + } + }; + + using TreeType = NaryTree<MetaData>; void add_for(clang::ASTContext *context, const Stmt *parent, - const Stmt *self) { - ids[self] = self->getID(*context); + const MetaData &self) { + ids[self.for_stmt] = self.for_stmt->getID(*context); if (parent == nullptr) { loops.push_back({}); loops.back().add_node(nullptr, self); } else { - auto ntree = std::find_if(loops.begin(), loops.end(), - [&parent](const NaryTree<const Stmt *> &tree) { - return tree.contains(parent); - }); + auto ntree = std::find_if( + loops.begin(), loops.end(), + [&parent](const TreeType &tree) { return tree.contains(parent); }); if (ntree != loops.end()) { ntree->add_node(parent, self); >From 1c47aa72ee4159ec484c02b2b043a4fa7da56a29 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Wed, 16 Jul 2025 17:15:41 +0100 Subject: [PATCH 12/16] Add function to get total repetition count of for loop considering its parents --- clang-tools-extra/feature-extractor/utils.h | 36 +++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h index 6ef00c065a3e7..63386167be2f9 100644 --- a/clang-tools-extra/feature-extractor/utils.h +++ b/clang-tools-extra/feature-extractor/utils.h @@ -63,6 +63,21 @@ get_for_condition_range_value(ASTContext *context, const ForStmt *fs) { return std::nullopt; } +/// +/// For a given for statement, tries to extract loop bound in the condition. Use +/// this function instead of two previous ones. This one internally uses the +/// others +/// +static inline std::optional<llvm::APInt> +maybe_get_for_bound(ASTContext *context, const ForStmt *fs) { + if (const auto method1 = get_for_condition_range_value(fs)) + return llvm::APInt(64, method1.value().getSExtValue()); + else if (const auto method2 = get_for_condition_range_value(context, fs)) + return method2.value(); + else + return std::nullopt; +} + /// /// For a given Stmt \s, tries to return the nearest ancestor of type /// StatementType. Return nullptr in case no parent of given type was found. @@ -100,3 +115,24 @@ static inline void run_on_all_parents_of_type(ASTContext *context, dyn_cast<StatementType>(parent)); } } + +/// +/// Get repetition of each for loop, considering parent for loops. For example, +/// for the following two nested for loops, result for the first for is 10, and +/// the result of nested one is 200 +/// +/// for(int i = 0; i < 10; i++) +/// for(int j = 0; j < 20; j++) +/// {} +/// +llvm::APInt get_total_for_repetition_count(ASTContext *context, + const ForStmt *fs) { + auto bounds = maybe_get_for_bound(context, fs).value_or(llvm::APInt(64, 1)); + + run_on_all_parents_of_type<ForStmt>( + context, fs, [&bounds](auto ctx, auto fss) { + bounds *= maybe_get_for_bound(ctx, fss).value_or(llvm::APInt(64, 1)); + }); + + return bounds; +} >From b5c75d4dedd367ce00ec5ed8aa05cbedeb09c62a Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Wed, 16 Jul 2025 17:22:29 +0100 Subject: [PATCH 13/16] Move utils functions to namespace --- clang-tools-extra/feature-extractor/utils.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang-tools-extra/feature-extractor/utils.h b/clang-tools-extra/feature-extractor/utils.h index 63386167be2f9..8237fb159fd68 100644 --- a/clang-tools-extra/feature-extractor/utils.h +++ b/clang-tools-extra/feature-extractor/utils.h @@ -5,6 +5,8 @@ #include <optional> +namespace Utils { + using namespace clang; /// @@ -136,3 +138,5 @@ llvm::APInt get_total_for_repetition_count(ASTContext *context, return bounds; } + +} // namespace Utils >From a48cf44371ce07b4cb0b21f778aedb91113ca6cd Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Wed, 16 Jul 2025 17:23:37 +0100 Subject: [PATCH 14/16] Add loop range to LoopsData meta data --- clang-tools-extra/feature-extractor/LoopsData.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h index aae45298e2e89..d8992b44b3589 100644 --- a/clang-tools-extra/feature-extractor/LoopsData.h +++ b/clang-tools-extra/feature-extractor/LoopsData.h @@ -14,8 +14,11 @@ class LoopsData { public: struct MetaData { const Stmt *for_stmt; + llvm::APInt loop_range; MetaData(const Stmt *fs) : for_stmt(fs) {} + MetaData(const Stmt *fs, const llvm::APInt &rng) + : for_stmt(fs), loop_range(rng) {} friend bool operator==(const MetaData &lhs, const MetaData &rhs) { return lhs.for_stmt == rhs.for_stmt; >From 692a014c3956ad4eee0cd73ab5253c65af668cb1 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Fri, 18 Jul 2025 19:02:44 +0100 Subject: [PATCH 15/16] Add AST visitors for float and int operations --- .../visitors/FloatOpCounter.h | 75 +++++++++++++++++++ .../visitors/IntegerOpCounter.h | 60 +++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h create mode 100644 clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h diff --git a/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h new file mode 100644 index 0000000000000..4daaaa4de78f4 --- /dev/null +++ b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h @@ -0,0 +1,75 @@ +#pragma once + +#include <cstddef> + +#include "clang/AST/RecursiveASTVisitor.h" + +class FloatOpCounter : public clang::RecursiveASTVisitor<FloatOpCounter> { +public: + explicit FloatOpCounter() : count(0) {} + + bool VisitBinaryOperator(clang::BinaryOperator *bo) { + using namespace clang; + + if (bo->getLHS()->getType()->isFloatingType() && + bo->getRHS()->getType()->isFloatingType()) { + ++count; + } + + return true; + } + + bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) { + if (cao->getLHS()->getType()->isFloatingType() && + cao->getRHS()->getType()->isFloatingType()) { + ++count; + } + + return true; + } + + bool VisitUnaryOperator(clang::UnaryOperator *uo) { + using namespace clang; + + if (uo->getSubExpr()->getType()->isFloatingType()) { + switch (uo->getOpcode()) { + case UO_PreInc: + case UO_PreDec: + case UO_PostInc: + case UO_PostDec: + case UO_Plus: + case UO_Minus: + ++count; + break; + default: + break; + } + } + + return true; + } + + bool VisitFloatingLiteral(clang::FloatingLiteral * /* fl */) { + ++count; + return true; + } + + bool VisitImplicitCastExpr(clang::ImplicitCastExpr *ice) { + if (ice->getType()->isFloatingType() && + !ice->getSubExpr()->getType()->isFloatingType()) { + ++count; + } + + return true; + } + + void traverse(clang::Stmt *S) { + count = 0; + TraverseStmt(S); + } + + std::size_t get_count() const { return count; } + +private: + std::size_t count; +}; diff --git a/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h new file mode 100644 index 0000000000000..a6f4a268b052f --- /dev/null +++ b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h @@ -0,0 +1,60 @@ +#pragma once + +#include <cstddef> + +#include "clang/AST/RecursiveASTVisitor.h" + +class IntegerOpCounter : public clang::RecursiveASTVisitor<IntegerOpCounter> { +public: + explicit IntegerOpCounter() : count(0) {} + + bool VisitBinaryOperator(clang::BinaryOperator *bo) { + if (bo->getLHS()->getType()->isIntegerType() && + bo->getRHS()->getType()->isIntegerType()) { + ++count; + } + + return true; + } + + bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) { + if (cao->getLHS()->getType()->isIntegerType() && + cao->getRHS()->getType()->isIntegerType()) { + ++count; + } + + return true; + } + + bool VisitUnaryOperator(clang::UnaryOperator *uo) { + using namespace clang; + + if (uo->getSubExpr()->getType()->isIntegerType()) { + switch (uo->getOpcode()) { + case UO_PreInc: + case UO_PostInc: + case UO_PreDec: + case UO_PostDec: + case UO_Plus: + case UO_Minus: + case UO_Not: + ++count; + break; + default: + break; + } + } + + return true; + } + + void traverse(clang::Stmt *s) { + count = 0; + TraverseStmt(s); + } + + std::size_t get_count() const { return count; } + +private: + std::size_t count; +}; >From 12e8f99bf8444a7e779ff14344972ba6cab8cfd7 Mon Sep 17 00:00:00 2001 From: Majid Kamali <majidkamali1...@gmail.com> Date: Fri, 18 Jul 2025 19:31:21 +0100 Subject: [PATCH 16/16] Add basic LoopsRange feature --- .../feature-extractor/LoopsData.h | 8 +- .../feature-extractor/features/LoopsRange.h | 79 +++++++++++++++++++ clang-tools-extra/feature-extractor/main.cpp | 3 +- 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 clang-tools-extra/feature-extractor/features/LoopsRange.h diff --git a/clang-tools-extra/feature-extractor/LoopsData.h b/clang-tools-extra/feature-extractor/LoopsData.h index d8992b44b3589..2c6ddd913cd28 100644 --- a/clang-tools-extra/feature-extractor/LoopsData.h +++ b/clang-tools-extra/feature-extractor/LoopsData.h @@ -15,10 +15,14 @@ class LoopsData { struct MetaData { const Stmt *for_stmt; llvm::APInt loop_range; + std::size_t float_ops; + std::size_t int_ops; MetaData(const Stmt *fs) : for_stmt(fs) {} - MetaData(const Stmt *fs, const llvm::APInt &rng) - : for_stmt(fs), loop_range(rng) {} + MetaData(const Stmt *fs, const llvm::APInt &rng, std::size_t float_ops, + std::size_t int_ops) + : for_stmt(fs), loop_range(rng), float_ops(float_ops), + int_ops(int_ops) {} friend bool operator==(const MetaData &lhs, const MetaData &rhs) { return lhs.for_stmt == rhs.for_stmt; diff --git a/clang-tools-extra/feature-extractor/features/LoopsRange.h b/clang-tools-extra/feature-extractor/features/LoopsRange.h new file mode 100644 index 0000000000000..c64ab074f6922 --- /dev/null +++ b/clang-tools-extra/feature-extractor/features/LoopsRange.h @@ -0,0 +1,79 @@ +#pragma once + +#include <array> + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" + +#include "../LoopsData.h" +#include "../utils.h" +#include "../visitors/FloatOpCounter.h" +#include "../visitors/IntegerOpCounter.h" + +using namespace clang; +using namespace clang::ast_matchers; +using namespace llvm; + +class LoopsRange : public MatchFinder::MatchCallback { + LoopsData loops_data; + +public: + static inline std::array Matchers = { + forStmt(unless(hasAncestor(forStmt()))).bind("topLevelFor"), + forStmt(hasAncestor(forStmt())).bind("nestedFor"), + // forStmt(hasParent(compoundStmt(hasParent(forStmt())))).bind("nestedFor"), + }; + + virtual void run(const MatchFinder::MatchResult &result) override { + static constexpr auto GatherData = + [](const MatchFinder::MatchResult &result, LoopsData &loops_data, + const clang::ForStmt *parent_for, const clang::ForStmt *fs) { + // llvm::outs() << "Nested for loop at "; + // fs->getForLoc().print(llvm::outs(), *result.SourceManager); + // llvm::outs() << "\n"; + + FloatOpCounter fCounter; + IntegerOpCounter iCounter; + + fCounter.traverse(const_cast<Stmt *>(fs->getBody())); + iCounter.traverse(const_cast<Stmt *>(fs->getBody())); + + loops_data.add_for( + result.Context, parent_for, + LoopsData::MetaData{ + fs, Utils::get_total_for_repetition_count(result.Context, fs), + fCounter.get_count(), iCounter.get_count()}); + }; + + if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("topLevelFor"); + Utils::is_in_main_file(result.Context, fs)) { + GatherData(result, loops_data, nullptr, fs); + } + + if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("nestedFor"); + Utils::is_in_main_file(result.Context, fs)) { + if (auto parent_for = + Utils::get_parent_stmt<ForStmt>(result.Context, fs)) { + GatherData(result, loops_data, parent_for, fs); + } + } + } + + static const char *get_title() { return "loops_range"; } + std::size_t get_result() { + llvm::outs() << "\n"; + + for (auto &loop : loops_data.get_loops()) { + loop.traverse_pre_order( + [&loop](const LoopsData::TreeType::TraverseResult &result) mutable { + const auto &[optParentStmt, selfMetaData, depth, isLeaf] = result; + + llvm::outs() << std::string(depth * 2, ' ') << "for " + << (isLeaf ? "(leaf) " : "") << selfMetaData.loop_range + << " " << selfMetaData.float_ops << " " + << selfMetaData.int_ops << "\n"; + }); + } + return loops_data.get_ids().size(); + } +}; diff --git a/clang-tools-extra/feature-extractor/main.cpp b/clang-tools-extra/feature-extractor/main.cpp index 4109a46ed3f1e..67abb131865f8 100644 --- a/clang-tools-extra/feature-extractor/main.cpp +++ b/clang-tools-extra/feature-extractor/main.cpp @@ -9,6 +9,7 @@ #include "FeatureManager.h" +#include "features/LoopsRange.h" #include "features/NumLoops.h" #include "features/OmpRegions.h" @@ -40,7 +41,7 @@ int main(int argc, const char **argv) { CommonOptionsParser &OptionsParser = ExpectedParser.get(); ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); - FeatureManager<NumLoops, OmpRegions> manager; + FeatureManager<NumLoops, OmpRegions, LoopsRange> manager; return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get()); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits