https://github.com/majidkamali1370 updated 
https://github.com/llvm/llvm-project/pull/149135

>From aab024f20b301aca4ab5299c88af2e32c72a3277 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Fri, 11 Jul 2025 23:16:27 +0100
Subject: [PATCH 01/16] Follow llvm and clang tutorial to create a sample tool

---
 clang-tools-extra/CMakeLists.txt              |  1 +
 .../feature-extractor/CMakeLists.txt          | 15 +++++
 clang-tools-extra/feature-extractor/main.cpp  | 55 +++++++++++++++++++
 3 files changed, 71 insertions(+)
 create mode 100644 clang-tools-extra/feature-extractor/CMakeLists.txt
 create mode 100644 clang-tools-extra/feature-extractor/main.cpp

diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt
index 6b6f2b1ca2276..11879a03eba74 100644
--- a/clang-tools-extra/CMakeLists.txt
+++ b/clang-tools-extra/CMakeLists.txt
@@ -28,6 +28,7 @@ add_subdirectory(clang-query)
 add_subdirectory(include-cleaner)
 add_subdirectory(pp-trace)
 add_subdirectory(tool-template)
+add_subdirectory(feature-extractor)
 
 option(CLANG_TOOLS_EXTRA_INCLUDE_DOCS "Generate build targets for the Clang 
Extra Tools docs."
   ${LLVM_INCLUDE_DOCS})
diff --git a/clang-tools-extra/feature-extractor/CMakeLists.txt 
b/clang-tools-extra/feature-extractor/CMakeLists.txt
new file mode 100644
index 0000000000000..6ae7b78a7ecad
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_executable(feature-extractor
+  main.cpp
+)
+
+target_link_libraries(feature-extractor
+  PRIVATE
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangFrontend
+  clangSerialization
+  clangTooling
+)
diff --git a/clang-tools-extra/feature-extractor/main.cpp 
b/clang-tools-extra/feature-extractor/main.cpp
new file mode 100644
index 0000000000000..6fec15a75e203
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -0,0 +1,55 @@
+// Declares clang::SyntaxOnlyAction.
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+// Declares llvm::cl::extrahelp.
+#include "llvm/Support/CommandLine.h"
+
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+// Apply a custom category to all command-line options so that they are the
+// only ones displayed.
+static llvm::cl::OptionCategory MyToolCategory("feature-extractor options");
+
+// CommonOptionsParser declares HelpMessage with a description of the common
+// command-line options related to the compilation database and input files.
+// It's nice to have this help message in all tools.
+static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
+
+// A help message for this specific tool can be added afterwards.
+static cl::extrahelp MoreHelp("\nMore help text...\n");
+
+StatementMatcher LoopMatcher =
+    forStmt(hasLoopInit(declStmt(hasSingleDecl(
+                varDecl(hasInitializer(integerLiteral(equals(0))))))))
+        .bind("forLoop");
+
+class LoopPrinter : public MatchFinder::MatchCallback {
+public:
+  virtual void run(const MatchFinder::MatchResult &Result) override {
+    if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
+      FS->dump();
+  }
+};
+
+int main(int argc, const char **argv) {
+  auto ExpectedParser = CommonOptionsParser::create(argc, argv, 
MyToolCategory);
+  if (!ExpectedParser) {
+    // Fail gracefully for unsupported options.
+    llvm::errs() << ExpectedParser.takeError();
+    return 1;
+  }
+  CommonOptionsParser &OptionsParser = ExpectedParser.get();
+  ClangTool Tool(OptionsParser.getCompilations(),
+                 OptionsParser.getSourcePathList());
+
+  LoopPrinter Printer;
+  MatchFinder Finder;
+  Finder.addMatcher(LoopMatcher, &Printer);
+
+  return Tool.run(newFrontendActionFactory(&Finder).get());
+}

>From 318b398e3da0e8b931bea849b188a30c6d411791 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Sat, 12 Jul 2025 15:40:31 +0100
Subject: [PATCH 02/16] Add FeatureManager class for easy integration of
 extracted features

---
 .../feature-extractor/FeatureManager.h        | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 clang-tools-extra/feature-extractor/FeatureManager.h

diff --git a/clang-tools-extra/feature-extractor/FeatureManager.h 
b/clang-tools-extra/feature-extractor/FeatureManager.h
new file mode 100644
index 0000000000000..5de1986630b7d
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/FeatureManager.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <tuple>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+template <typename... Features> class FeatureManager {
+  std::tuple<Features...> features;
+  MatchFinder match_finder;
+
+public:
+  FeatureManager() {
+    (
+        [&]() {
+          for (const auto &matcher : Features::Matchers)
+            match_finder.addMatcher(matcher, &std::get<Features>(features));
+        }(),
+        ...);
+  }
+
+  MatchFinder *get_match_finder() { return &match_finder; }
+
+  ~FeatureManager() {
+    llvm::outs() << "\n";
+
+    (
+        [&]() {
+          llvm::outs() << Features::get_title() << " : "
+                       << std::get<Features>(features).get_result() << "\n";
+        }(),
+        ...);
+  }
+};

>From 8978a24b1b9a2fe0c2daa393b65b51dcce7b5528 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Sun, 13 Jul 2025 23:43:35 +0100
Subject: [PATCH 03/16] Add NaryTree to hold items in tree-like structure

---
 .../feature-extractor/NaryTree.h              | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 clang-tools-extra/feature-extractor/NaryTree.h

diff --git a/clang-tools-extra/feature-extractor/NaryTree.h 
b/clang-tools-extra/feature-extractor/NaryTree.h
new file mode 100644
index 0000000000000..57109bfd2e8f7
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/NaryTree.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+template <typename T> class NaryTree {
+  struct Node {
+    Node(const T &t) : value(t) {}
+
+    std::vector<std::unique_ptr<Node>> children;
+    T value;
+  };
+
+  using Element = std::unique_ptr<Node>;
+
+  Element root{nullptr};
+
+  template <typename Func>
+  void traverse_post_order_impl(Element *parent, Element &current, int depth,
+                                bool is_leaf, Func &&f) {
+    for (auto &child : current->children)
+      traverse_post_order_impl(&current, child, depth + 1,
+                               !child->children.size(), f);
+
+    f(parent, current, depth, !current->children.size());
+  }
+
+  template <typename Func>
+  void traverse_pre_order_impl(Element *parent, Element &current, int depth,
+                               bool is_leaf, Func &&f) {
+    f(parent, current, depth, !current->children.size());
+
+    for (auto &child : current->children)
+      traverse_pre_order_impl(&current, child, depth + 1,
+                              !child->children.size(), f);
+  }
+
+  const Element *find_node(const Element &current, const T &data) const {
+    if (current) {
+      if (current->value == data)
+        return &current;
+      else
+        for (const auto &child : current->children)
+          return find_node(child, data);
+    }
+
+    return nullptr;
+  }
+
+public:
+  struct TraverseResult {
+    std::optional<T> parent;
+    T &self;
+    int depth;
+    bool is_leaf;
+  };
+
+  template <typename Func> void traverse_post_order(Func &&f) {
+    traverse_post_order_impl(
+        nullptr, root, 0,
+        [&f](Element *parent, Element &n, int depth, bool is_leaf) {
+          std::optional<T> opt;
+
+          if (parent)
+            opt = (*parent)->value;
+
+          f(TraverseResult{opt, n->value, depth, is_leaf});
+        });
+  }
+
+  template <typename Func> void traverse_pre_order(Func &&f) {
+    traverse_pre_order_impl(
+        nullptr, root, 0, !root->children.size(),
+        [&f](Element *parent, Element &n, int depth, bool is_leaf) {
+          std::optional<T> opt;
+
+          if (parent)
+            opt = (*parent)->value;
+
+          f(TraverseResult{opt, n->value, depth, is_leaf});
+        });
+  }
+
+  bool contains(const T &data) const {
+    return find_node(root, data) != nullptr;
+  }
+
+  bool add_node(const T &parentData, const T &data) {
+    if (!root) {
+      root = std::make_unique<Node>(data);
+      return true;
+    }
+
+    if (auto node = find_node(root, parentData)) {
+      (*node)->children.push_back(std::make_unique<Node>(data));
+      return true;
+    }
+
+    return false;
+  }
+};

>From e3b3a8c2fb2bc3d58237c97fb3ddae5d288c615e Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Sun, 13 Jul 2025 23:47:13 +0100
Subject: [PATCH 04/16] Add NumLoops feature and use in FeatureManager

---
 .../feature-extractor/features/NumLoops.h     | 42 +++++++++++++++++++
 clang-tools-extra/feature-extractor/main.cpp  | 26 ++++--------
 2 files changed, 50 insertions(+), 18 deletions(-)
 create mode 100644 clang-tools-extra/feature-extractor/features/NumLoops.h

diff --git a/clang-tools-extra/feature-extractor/features/NumLoops.h 
b/clang-tools-extra/feature-extractor/features/NumLoops.h
new file mode 100644
index 0000000000000..cad96c7775c23
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/NumLoops.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+#include "../utils.h"
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+class NumLoops : public MatchFinder::MatchCallback {
+private:
+  std::size_t num_loops{0};
+
+public:
+  static inline std::array Matchers = {forStmt().bind("forLoops"),
+                                       whileStmt().bind("whileLoops")};
+
+  virtual void run(const MatchFinder::MatchResult &result) override {
+    auto context = result.Context;
+
+    const auto fs = result.Nodes.getNodeAs<ForStmt>("forLoops");
+    const auto ws = result.Nodes.getNodeAs<WhileStmt>("whileLoops");
+
+    // We do not want to convert header files!
+    if ((!fs ||
+         !context->getSourceManager().isWrittenInMainFile(fs->getForLoc())) &&
+        (!ws ||
+         !context->getSourceManager().isWrittenInMainFile(ws->getWhileLoc())))
+      return;
+
+    num_loops++;
+  }
+
+  static const char *get_title() { return "num_loops"; }
+  std::size_t get_result() const { return num_loops; }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp 
b/clang-tools-extra/feature-extractor/main.cpp
index 6fec15a75e203..3b75cb6f7d419 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -7,6 +7,10 @@
 // Declares llvm::cl::extrahelp.
 #include "llvm/Support/CommandLine.h"
 
+#include "FeatureManager.h"
+
+#include "features/NumLoops.h"
+
 using namespace clang::tooling;
 using namespace clang::ast_matchers;
 using namespace llvm;
@@ -23,33 +27,19 @@ static cl::extrahelp 
CommonHelp(CommonOptionsParser::HelpMessage);
 // A help message for this specific tool can be added afterwards.
 static cl::extrahelp MoreHelp("\nMore help text...\n");
 
-StatementMatcher LoopMatcher =
-    forStmt(hasLoopInit(declStmt(hasSingleDecl(
-                varDecl(hasInitializer(integerLiteral(equals(0))))))))
-        .bind("forLoop");
-
-class LoopPrinter : public MatchFinder::MatchCallback {
-public:
-  virtual void run(const MatchFinder::MatchResult &Result) override {
-    if (const auto *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
-      FS->dump();
-  }
-};
-
 int main(int argc, const char **argv) {
   auto ExpectedParser = CommonOptionsParser::create(argc, argv, 
MyToolCategory);
+
   if (!ExpectedParser) {
     // Fail gracefully for unsupported options.
     llvm::errs() << ExpectedParser.takeError();
     return 1;
   }
+
   CommonOptionsParser &OptionsParser = ExpectedParser.get();
   ClangTool Tool(OptionsParser.getCompilations(),
                  OptionsParser.getSourcePathList());
+  FeatureManager<NumLoops> manager;
 
-  LoopPrinter Printer;
-  MatchFinder Finder;
-  Finder.addMatcher(LoopMatcher, &Printer);
-
-  return Tool.run(newFrontendActionFactory(&Finder).get());
+  return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
 }

>From 0093688c74a0efdfc5b29ab9ca46ee5829c13ebe Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 14:51:08 +0100
Subject: [PATCH 05/16] Add various util functions

---
 clang-tools-extra/feature-extractor/utils.h | 80 +++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 clang-tools-extra/feature-extractor/utils.h

diff --git a/clang-tools-extra/feature-extractor/utils.h 
b/clang-tools-extra/feature-extractor/utils.h
new file mode 100644
index 0000000000000..e30e702e3202c
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+
+#include <optional>
+
+using namespace clang;
+
+///
+/// Check if two ValueDecl pointers refer to the same variable in AST
+///
+static inline bool are_same_variable(const ValueDecl *First,
+                                     const ValueDecl *Second) {
+  return First && Second &&
+         First->getCanonicalDecl() == Second->getCanonicalDecl();
+}
+
+///
+/// Check if for statement is defined in the translation unit file (not 
headers)
+///
+static inline bool is_in_main_file(ASTContext *context, const ForStmt *fs) {
+  return fs && 
context->getSourceManager().isWrittenInMainFile(fs->getForLoc());
+}
+
+///
+/// For a given for statement, tries to extract loop bound in the condition
+///
+static inline std::optional<llvm::APInt>
+get_for_condition_range_value(const ForStmt *fs) {
+  const Expr *cond = fs->getCond();
+
+  if (cond) {
+    if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(cond)) {
+      const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts();
+
+      if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(RHS))
+        return IL->getValue();
+    }
+  }
+
+  return std::nullopt;
+}
+
+///
+/// For a given Stmt \s, tries to return the nearest ancestor of type
+/// StatementType. Return nullptr in case no parent of given type was found.
+///
+template <typename StatementType>
+static inline const StatementType *get_parent_stmt(ASTContext *context,
+                                                   const Stmt *s) {
+  auto parents = context->getParents(*s);
+
+  if (parents.empty())
+    return nullptr;
+
+  for (auto &p : parents)
+    if (const StatementType *parent_stmt = p.get<StatementType>())
+      return parent_stmt;
+    else
+      return get_parent_stmt<StatementType>(context, p.get<Stmt>());
+
+  return nullptr;
+}
+
+///
+/// Run a callable on all parents of type StatementType of \s recursively goes
+/// up.
+///
+template <typename StatementType, typename Func>
+static inline void run_on_all_parents_of_type(ASTContext *context,
+                                              const Stmt *s, Func &&f) {
+  auto parent = get_parent_stmt<StatementType>(context, s);
+
+  while (parent) {
+    f(context, parent);
+    parent = get_parent_stmt<StatementType>(context,
+                                            dyn_cast<StatementType>(parent));
+  }
+}

>From eed01f169c10fd5d8cec8d0a47a58cfa82018ff2 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 14:51:43 +0100
Subject: [PATCH 06/16] Add get_parent API to NaryTree

---
 clang-tools-extra/feature-extractor/NaryTree.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/clang-tools-extra/feature-extractor/NaryTree.h 
b/clang-tools-extra/feature-extractor/NaryTree.h
index 57109bfd2e8f7..5acc9421e3bf8 100644
--- a/clang-tools-extra/feature-extractor/NaryTree.h
+++ b/clang-tools-extra/feature-extractor/NaryTree.h
@@ -86,6 +86,19 @@ template <typename T> class NaryTree {
     return find_node(root, data) != nullptr;
   }
 
+  std::optional<T> get_parent(const T &data) {
+    std::optional<T> result;
+
+    traverse_pre_order_impl(
+        nullptr, root, 0, !root->children.size(),
+        [&result, &data](Element *parent, Element &n, int depth, bool is_leaf) 
{
+          if (parent && n->value == data)
+            result = (*parent)->value;
+        });
+
+    return result;
+  }
+
   bool add_node(const T &parentData, const T &data) {
     if (!root) {
       root = std::make_unique<Node>(data);

>From 2675b130fc81ff1c4b671e87d9816617897ab783 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 14:54:37 +0100
Subject: [PATCH 07/16] Add LoopsData class to contain for loops AST nodes

---
 .../feature-extractor/LoopsData.h             | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 clang-tools-extra/feature-extractor/LoopsData.h

diff --git a/clang-tools-extra/feature-extractor/LoopsData.h 
b/clang-tools-extra/feature-extractor/LoopsData.h
new file mode 100644
index 0000000000000..58b704732fe0a
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <cstdint>
+#include <unordered_map>
+#include <vector>
+
+#include <clang/AST/Stmt.h>
+
+#include "NaryTree.h"
+
+using namespace clang;
+
+class LoopsData {
+public:
+  using TreeType = NaryTree<const Stmt *>;
+
+  void add_for(clang::ASTContext *context, const Stmt *parent,
+               const Stmt *self) {
+    ids[self] = self->getID(*context);
+
+    if (parent == nullptr) {
+      loops.push_back({});
+      loops.back().add_node(nullptr, self);
+    } else {
+      auto ntree = std::find_if(loops.begin(), loops.end(),
+                                [&parent](const NaryTree<const Stmt *> &tree) {
+                                  return tree.contains(parent);
+                                });
+
+      if (ntree != loops.end()) {
+        ntree->add_node(parent, self);
+      }
+    }
+  }
+
+  auto &get_ids() { return ids; }
+  auto &get_loops() { return loops; }
+
+private:
+  std::unordered_map<const Stmt *, std::int64_t> ids;
+  std::vector<TreeType> loops;
+};

>From 07af8dd146fa516bb503929f78a820425d9c71c6 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 19:05:06 +0100
Subject: [PATCH 08/16] Add OmpRegions feature

---
 .../feature-extractor/features/OmpRegions.h   | 67 +++++++++++++++++++
 clang-tools-extra/feature-extractor/main.cpp  |  3 +-
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 clang-tools-extra/feature-extractor/features/OmpRegions.h

diff --git a/clang-tools-extra/feature-extractor/features/OmpRegions.h 
b/clang-tools-extra/feature-extractor/features/OmpRegions.h
new file mode 100644
index 0000000000000..666b49bd8482c
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/OmpRegions.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <numeric>
+#include <unordered_map>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+using namespace clang;
+using namespace clang::ast_matchers;
+
+class OmpRegions : public MatchFinder::MatchCallback {
+  std::unordered_map<std::string, unsigned> regions_count;
+
+public:
+  static inline std::array Matchers = {
+      ompExecutableDirective().bind("ompRegion")};
+
+  virtual void run(const MatchFinder::MatchResult &result) override {
+    if (const auto *omp_directive =
+            result.Nodes.getNodeAs<OMPExecutableDirective>("ompRegion")) {
+      std::string omp_type;
+
+      using namespace llvm;
+      if (isa<OMPParallelDirective>(omp_directive))
+        omp_type = "parallel";
+      else if (isa<OMPForDirective>(omp_directive))
+        omp_type = "for";
+      else if (isa<OMPParallelForDirective>(omp_directive))
+        omp_type = "parallel for";
+      else if (isa<OMPSingleDirective>(omp_directive))
+        omp_type = "single";
+      else if (isa<OMPMasterDirective>(omp_directive))
+        omp_type = "master";
+      else if (isa<OMPCriticalDirective>(omp_directive))
+        omp_type = "critical";
+      else if (isa<OMPTaskDirective>(omp_directive))
+        omp_type = "task";
+      else if (isa<OMPSectionDirective>(omp_directive))
+        omp_type = "section";
+      else if (isa<OMPSectionsDirective>(omp_directive))
+        omp_type = "sections";
+      else if (isa<OMPBarrierDirective>(omp_directive))
+        omp_type = "barrier";
+      else
+        omp_type = "other";
+
+      regions_count[omp_type]++;
+    }
+  }
+
+  static const char *get_title() { return "opm_regions"; }
+  std::size_t get_result() {
+#ifndef NDEBUG
+    llvm::outs() << "\n";
+    for (const auto &pair : regions_count)
+      llvm::outs() << "OMP region [" << pair.first << "]: " << pair.second
+                   << "\n";
+#endif
+
+    return std::accumulate(
+        regions_count.cbegin(), regions_count.cend(), std::size_t{0},
+        [](std::size_t acc, const auto &p) { return acc + p.second; });
+  }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp 
b/clang-tools-extra/feature-extractor/main.cpp
index 3b75cb6f7d419..4109a46ed3f1e 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -10,6 +10,7 @@
 #include "FeatureManager.h"
 
 #include "features/NumLoops.h"
+#include "features/OmpRegions.h"
 
 using namespace clang::tooling;
 using namespace clang::ast_matchers;
@@ -39,7 +40,7 @@ int main(int argc, const char **argv) {
   CommonOptionsParser &OptionsParser = ExpectedParser.get();
   ClangTool Tool(OptionsParser.getCompilations(),
                  OptionsParser.getSourcePathList());
-  FeatureManager<NumLoops> manager;
+  FeatureManager<NumLoops, OmpRegions> manager;
 
   return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
 }

>From 91e806aae264598f7b8e6bda8588f8b38b78e783 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 23:24:36 +0100
Subject: [PATCH 09/16] Bugfix get_parent_stamt in utils

---
 clang-tools-extra/feature-extractor/utils.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang-tools-extra/feature-extractor/utils.h 
b/clang-tools-extra/feature-extractor/utils.h
index e30e702e3202c..ceece1b3fb96b 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -54,11 +54,12 @@ static inline const StatementType 
*get_parent_stmt(ASTContext *context,
   if (parents.empty())
     return nullptr;
 
-  for (auto &p : parents)
-    if (const StatementType *parent_stmt = p.get<StatementType>())
-      return parent_stmt;
-    else
-      return get_parent_stmt<StatementType>(context, p.get<Stmt>());
+  const auto p = parents[0];
+
+  if (const StatementType *parent_stmt = p.get<StatementType>())
+    return parent_stmt;
+  else if (const auto pStmt = p.get<Stmt>())
+    return get_parent_stmt<StatementType>(context, pStmt);
 
   return nullptr;
 }

>From d4894ce223238bb38dcc2cfb756d78afc03afe85 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Tue, 15 Jul 2025 23:58:08 +0100
Subject: [PATCH 10/16] Add function to evaluate more complex for loop
 conditions

---
 clang-tools-extra/feature-extractor/utils.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/clang-tools-extra/feature-extractor/utils.h 
b/clang-tools-extra/feature-extractor/utils.h
index ceece1b3fb96b..6ef00c065a3e7 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -25,6 +25,7 @@ static inline bool is_in_main_file(ASTContext *context, const 
ForStmt *fs) {
 
 ///
 /// For a given for statement, tries to extract loop bound in the condition
+/// This function extracts literal conditions from for loop condition
 ///
 static inline std::optional<llvm::APInt>
 get_for_condition_range_value(const ForStmt *fs) {
@@ -42,6 +43,26 @@ get_for_condition_range_value(const ForStmt *fs) {
   return std::nullopt;
 }
 
+///
+/// For a given for statement, tries to extract loop bound in the condition
+/// This function evaluates macro conditions from for loop condition
+///
+static inline std::optional<llvm::APSInt>
+get_for_condition_range_value(ASTContext *context, const ForStmt *fs) {
+
+  if (const Expr *cond = fs->getCond(); cond) {
+    if (const BinaryOperator *binOp = dyn_cast<BinaryOperator>(cond)) {
+      const Expr *rhs = binOp->getRHS();
+      clang::Expr::EvalResult eval;
+      if (rhs->EvaluateAsInt(eval, *context)) {
+        return eval.Val.getInt();
+      }
+    }
+  }
+
+  return std::nullopt;
+}
+
 ///
 /// For a given Stmt \s, tries to return the nearest ancestor of type
 /// StatementType. Return nullptr in case no parent of given type was found.

>From 0e3d022638e5fd87b1ebeb73e92b70ac66ef01c1 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Wed, 16 Jul 2025 01:00:54 +0100
Subject: [PATCH 11/16] Improve LoopsData to accept more data other than Stmt
 pointer

---
 .../feature-extractor/LoopsData.h             | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/feature-extractor/LoopsData.h 
b/clang-tools-extra/feature-extractor/LoopsData.h
index 58b704732fe0a..aae45298e2e89 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -12,20 +12,29 @@ using namespace clang;
 
 class LoopsData {
 public:
-  using TreeType = NaryTree<const Stmt *>;
+  struct MetaData {
+    const Stmt *for_stmt;
+
+    MetaData(const Stmt *fs) : for_stmt(fs) {}
+
+    friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
+      return lhs.for_stmt == rhs.for_stmt;
+    }
+  };
+
+  using TreeType = NaryTree<MetaData>;
 
   void add_for(clang::ASTContext *context, const Stmt *parent,
-               const Stmt *self) {
-    ids[self] = self->getID(*context);
+               const MetaData &self) {
+    ids[self.for_stmt] = self.for_stmt->getID(*context);
 
     if (parent == nullptr) {
       loops.push_back({});
       loops.back().add_node(nullptr, self);
     } else {
-      auto ntree = std::find_if(loops.begin(), loops.end(),
-                                [&parent](const NaryTree<const Stmt *> &tree) {
-                                  return tree.contains(parent);
-                                });
+      auto ntree = std::find_if(
+          loops.begin(), loops.end(),
+          [&parent](const TreeType &tree) { return tree.contains(parent); });
 
       if (ntree != loops.end()) {
         ntree->add_node(parent, self);

>From 1c47aa72ee4159ec484c02b2b043a4fa7da56a29 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Wed, 16 Jul 2025 17:15:41 +0100
Subject: [PATCH 12/16] Add function to get total repetition count of for loop
 considering its parents

---
 clang-tools-extra/feature-extractor/utils.h | 36 +++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/clang-tools-extra/feature-extractor/utils.h 
b/clang-tools-extra/feature-extractor/utils.h
index 6ef00c065a3e7..63386167be2f9 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -63,6 +63,21 @@ get_for_condition_range_value(ASTContext *context, const 
ForStmt *fs) {
   return std::nullopt;
 }
 
+///
+/// For a given for statement, tries to extract loop bound in the condition. 
Use
+/// this function instead of two previous ones. This one internally uses the
+/// others
+///
+static inline std::optional<llvm::APInt>
+maybe_get_for_bound(ASTContext *context, const ForStmt *fs) {
+  if (const auto method1 = get_for_condition_range_value(fs))
+    return llvm::APInt(64, method1.value().getSExtValue());
+  else if (const auto method2 = get_for_condition_range_value(context, fs))
+    return method2.value();
+  else
+    return std::nullopt;
+}
+
 ///
 /// For a given Stmt \s, tries to return the nearest ancestor of type
 /// StatementType. Return nullptr in case no parent of given type was found.
@@ -100,3 +115,24 @@ static inline void run_on_all_parents_of_type(ASTContext 
*context,
                                             dyn_cast<StatementType>(parent));
   }
 }
+
+///
+/// Get repetition of each for loop, considering parent for loops. For example,
+/// for the following two nested for loops, result for the first for is 10, and
+/// the result of nested one is 200
+///
+/// for(int i = 0; i < 10; i++)
+///   for(int j = 0; j < 20; j++)
+///   {}
+///
+llvm::APInt get_total_for_repetition_count(ASTContext *context,
+                                           const ForStmt *fs) {
+  auto bounds = maybe_get_for_bound(context, fs).value_or(llvm::APInt(64, 1));
+
+  run_on_all_parents_of_type<ForStmt>(
+      context, fs, [&bounds](auto ctx, auto fss) {
+        bounds *= maybe_get_for_bound(ctx, fss).value_or(llvm::APInt(64, 1));
+      });
+
+  return bounds;
+}

>From b5c75d4dedd367ce00ec5ed8aa05cbedeb09c62a Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Wed, 16 Jul 2025 17:22:29 +0100
Subject: [PATCH 13/16] Move utils functions to namespace

---
 clang-tools-extra/feature-extractor/utils.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang-tools-extra/feature-extractor/utils.h 
b/clang-tools-extra/feature-extractor/utils.h
index 63386167be2f9..8237fb159fd68 100644
--- a/clang-tools-extra/feature-extractor/utils.h
+++ b/clang-tools-extra/feature-extractor/utils.h
@@ -5,6 +5,8 @@
 
 #include <optional>
 
+namespace Utils {
+
 using namespace clang;
 
 ///
@@ -136,3 +138,5 @@ llvm::APInt get_total_for_repetition_count(ASTContext 
*context,
 
   return bounds;
 }
+
+} // namespace Utils

>From a48cf44371ce07b4cb0b21f778aedb91113ca6cd Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Wed, 16 Jul 2025 17:23:37 +0100
Subject: [PATCH 14/16] Add loop range to LoopsData meta data

---
 clang-tools-extra/feature-extractor/LoopsData.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang-tools-extra/feature-extractor/LoopsData.h 
b/clang-tools-extra/feature-extractor/LoopsData.h
index aae45298e2e89..d8992b44b3589 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -14,8 +14,11 @@ class LoopsData {
 public:
   struct MetaData {
     const Stmt *for_stmt;
+    llvm::APInt loop_range;
 
     MetaData(const Stmt *fs) : for_stmt(fs) {}
+    MetaData(const Stmt *fs, const llvm::APInt &rng)
+        : for_stmt(fs), loop_range(rng) {}
 
     friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
       return lhs.for_stmt == rhs.for_stmt;

>From 692a014c3956ad4eee0cd73ab5253c65af668cb1 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Fri, 18 Jul 2025 19:02:44 +0100
Subject: [PATCH 15/16] Add AST visitors for float and int operations

---
 .../visitors/FloatOpCounter.h                 | 75 +++++++++++++++++++
 .../visitors/IntegerOpCounter.h               | 60 +++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 
clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
 create mode 100644 
clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h

diff --git a/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h 
b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
new file mode 100644
index 0000000000000..4daaaa4de78f4
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/visitors/FloatOpCounter.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <cstddef>
+
+#include "clang/AST/RecursiveASTVisitor.h"
+
+class FloatOpCounter : public clang::RecursiveASTVisitor<FloatOpCounter> {
+public:
+  explicit FloatOpCounter() : count(0) {}
+
+  bool VisitBinaryOperator(clang::BinaryOperator *bo) {
+    using namespace clang;
+
+    if (bo->getLHS()->getType()->isFloatingType() &&
+        bo->getRHS()->getType()->isFloatingType()) {
+      ++count;
+    }
+
+    return true;
+  }
+
+  bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) {
+    if (cao->getLHS()->getType()->isFloatingType() &&
+        cao->getRHS()->getType()->isFloatingType()) {
+      ++count;
+    }
+
+    return true;
+  }
+
+  bool VisitUnaryOperator(clang::UnaryOperator *uo) {
+    using namespace clang;
+
+    if (uo->getSubExpr()->getType()->isFloatingType()) {
+      switch (uo->getOpcode()) {
+      case UO_PreInc:
+      case UO_PreDec:
+      case UO_PostInc:
+      case UO_PostDec:
+      case UO_Plus:
+      case UO_Minus:
+        ++count;
+        break;
+      default:
+        break;
+      }
+    }
+
+    return true;
+  }
+
+  bool VisitFloatingLiteral(clang::FloatingLiteral * /* fl */) {
+    ++count;
+    return true;
+  }
+
+  bool VisitImplicitCastExpr(clang::ImplicitCastExpr *ice) {
+    if (ice->getType()->isFloatingType() &&
+        !ice->getSubExpr()->getType()->isFloatingType()) {
+      ++count;
+    }
+
+    return true;
+  }
+
+  void traverse(clang::Stmt *S) {
+    count = 0;
+    TraverseStmt(S);
+  }
+
+  std::size_t get_count() const { return count; }
+
+private:
+  std::size_t count;
+};
diff --git a/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h 
b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h
new file mode 100644
index 0000000000000..a6f4a268b052f
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/visitors/IntegerOpCounter.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <cstddef>
+
+#include "clang/AST/RecursiveASTVisitor.h"
+
+class IntegerOpCounter : public clang::RecursiveASTVisitor<IntegerOpCounter> {
+public:
+  explicit IntegerOpCounter() : count(0) {}
+
+  bool VisitBinaryOperator(clang::BinaryOperator *bo) {
+    if (bo->getLHS()->getType()->isIntegerType() &&
+        bo->getRHS()->getType()->isIntegerType()) {
+      ++count;
+    }
+
+    return true;
+  }
+
+  bool VisitCompoundAssignOperator(clang::CompoundAssignOperator *cao) {
+    if (cao->getLHS()->getType()->isIntegerType() &&
+        cao->getRHS()->getType()->isIntegerType()) {
+      ++count;
+    }
+
+    return true;
+  }
+
+  bool VisitUnaryOperator(clang::UnaryOperator *uo) {
+    using namespace clang;
+
+    if (uo->getSubExpr()->getType()->isIntegerType()) {
+      switch (uo->getOpcode()) {
+      case UO_PreInc:
+      case UO_PostInc:
+      case UO_PreDec:
+      case UO_PostDec:
+      case UO_Plus:
+      case UO_Minus:
+      case UO_Not:
+        ++count;
+        break;
+      default:
+        break;
+      }
+    }
+
+    return true;
+  }
+
+  void traverse(clang::Stmt *s) {
+    count = 0;
+    TraverseStmt(s);
+  }
+
+  std::size_t get_count() const { return count; }
+
+private:
+  std::size_t count;
+};

>From 12e8f99bf8444a7e779ff14344972ba6cab8cfd7 Mon Sep 17 00:00:00 2001
From: Majid Kamali <majidkamali1...@gmail.com>
Date: Fri, 18 Jul 2025 19:31:21 +0100
Subject: [PATCH 16/16] Add basic LoopsRange feature

---
 .../feature-extractor/LoopsData.h             |  8 +-
 .../feature-extractor/features/LoopsRange.h   | 79 +++++++++++++++++++
 clang-tools-extra/feature-extractor/main.cpp  |  3 +-
 3 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 clang-tools-extra/feature-extractor/features/LoopsRange.h

diff --git a/clang-tools-extra/feature-extractor/LoopsData.h 
b/clang-tools-extra/feature-extractor/LoopsData.h
index d8992b44b3589..2c6ddd913cd28 100644
--- a/clang-tools-extra/feature-extractor/LoopsData.h
+++ b/clang-tools-extra/feature-extractor/LoopsData.h
@@ -15,10 +15,14 @@ class LoopsData {
   struct MetaData {
     const Stmt *for_stmt;
     llvm::APInt loop_range;
+    std::size_t float_ops;
+    std::size_t int_ops;
 
     MetaData(const Stmt *fs) : for_stmt(fs) {}
-    MetaData(const Stmt *fs, const llvm::APInt &rng)
-        : for_stmt(fs), loop_range(rng) {}
+    MetaData(const Stmt *fs, const llvm::APInt &rng, std::size_t float_ops,
+             std::size_t int_ops)
+        : for_stmt(fs), loop_range(rng), float_ops(float_ops),
+          int_ops(int_ops) {}
 
     friend bool operator==(const MetaData &lhs, const MetaData &rhs) {
       return lhs.for_stmt == rhs.for_stmt;
diff --git a/clang-tools-extra/feature-extractor/features/LoopsRange.h 
b/clang-tools-extra/feature-extractor/features/LoopsRange.h
new file mode 100644
index 0000000000000..c64ab074f6922
--- /dev/null
+++ b/clang-tools-extra/feature-extractor/features/LoopsRange.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <array>
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+
+#include "../LoopsData.h"
+#include "../utils.h"
+#include "../visitors/FloatOpCounter.h"
+#include "../visitors/IntegerOpCounter.h"
+
+using namespace clang;
+using namespace clang::ast_matchers;
+using namespace llvm;
+
+class LoopsRange : public MatchFinder::MatchCallback {
+  LoopsData loops_data;
+
+public:
+  static inline std::array Matchers = {
+      forStmt(unless(hasAncestor(forStmt()))).bind("topLevelFor"),
+      forStmt(hasAncestor(forStmt())).bind("nestedFor"),
+      // 
forStmt(hasParent(compoundStmt(hasParent(forStmt())))).bind("nestedFor"),
+  };
+
+  virtual void run(const MatchFinder::MatchResult &result) override {
+    static constexpr auto GatherData =
+        [](const MatchFinder::MatchResult &result, LoopsData &loops_data,
+           const clang::ForStmt *parent_for, const clang::ForStmt *fs) {
+          // llvm::outs() << "Nested for loop at ";
+          // fs->getForLoc().print(llvm::outs(), *result.SourceManager);
+          // llvm::outs() << "\n";
+
+          FloatOpCounter fCounter;
+          IntegerOpCounter iCounter;
+
+          fCounter.traverse(const_cast<Stmt *>(fs->getBody()));
+          iCounter.traverse(const_cast<Stmt *>(fs->getBody()));
+
+          loops_data.add_for(
+              result.Context, parent_for,
+              LoopsData::MetaData{
+                  fs, Utils::get_total_for_repetition_count(result.Context, 
fs),
+                  fCounter.get_count(), iCounter.get_count()});
+        };
+
+    if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("topLevelFor");
+        Utils::is_in_main_file(result.Context, fs)) {
+      GatherData(result, loops_data, nullptr, fs);
+    }
+
+    if (const ForStmt *fs = result.Nodes.getNodeAs<ForStmt>("nestedFor");
+        Utils::is_in_main_file(result.Context, fs)) {
+      if (auto parent_for =
+              Utils::get_parent_stmt<ForStmt>(result.Context, fs)) {
+        GatherData(result, loops_data, parent_for, fs);
+      }
+    }
+  }
+
+  static const char *get_title() { return "loops_range"; }
+  std::size_t get_result() {
+    llvm::outs() << "\n";
+
+    for (auto &loop : loops_data.get_loops()) {
+      loop.traverse_pre_order(
+          [&loop](const LoopsData::TreeType::TraverseResult &result) mutable {
+            const auto &[optParentStmt, selfMetaData, depth, isLeaf] = result;
+
+            llvm::outs() << std::string(depth * 2, ' ') << "for "
+                         << (isLeaf ? "(leaf) " : "") << 
selfMetaData.loop_range
+                         << " " << selfMetaData.float_ops << " "
+                         << selfMetaData.int_ops << "\n";
+          });
+    }
+    return loops_data.get_ids().size();
+  }
+};
diff --git a/clang-tools-extra/feature-extractor/main.cpp 
b/clang-tools-extra/feature-extractor/main.cpp
index 4109a46ed3f1e..67abb131865f8 100644
--- a/clang-tools-extra/feature-extractor/main.cpp
+++ b/clang-tools-extra/feature-extractor/main.cpp
@@ -9,6 +9,7 @@
 
 #include "FeatureManager.h"
 
+#include "features/LoopsRange.h"
 #include "features/NumLoops.h"
 #include "features/OmpRegions.h"
 
@@ -40,7 +41,7 @@ int main(int argc, const char **argv) {
   CommonOptionsParser &OptionsParser = ExpectedParser.get();
   ClangTool Tool(OptionsParser.getCompilations(),
                  OptionsParser.getSourcePathList());
-  FeatureManager<NumLoops, OmpRegions> manager;
+  FeatureManager<NumLoops, OmpRegions, LoopsRange> manager;
 
   return Tool.run(newFrontendActionFactory(manager.get_match_finder()).get());
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to