NoQ created this revision.
NoQ added reviewers: aaron.ballman, gribozavr2, xazax.hun, jkorous, t-rasmud, 
ziqingluo-90, malavikasamak.
Herald added subscribers: steakhal, martong, rnkovacs.
Herald added a project: All.
NoQ requested review of this revision.

This patch introduces a hierarchy of `Gadget` classes, which are going to be 
the core concept in our fixit machine.

A //gadget// is a rigid code pattern that constitutes an operation that the 
fixit machine "understands" well enough to work with. A gadget may be "unsafe" 
(i.e., constitute a raw buffer access) or "safe" (constitute a use of a pointer 
or array variable that isn't unsafe per se, but may need fixing if the 
participating variable changes type to a safe container/view).

By "rigid" I mean something like "matchable with a sequence of nested 
if-dyncast-statements or an ASTMatcher without `forEachDescendant()` or other 
non-trivial traversal, except maybe `ignoringParenImpCasts()` and such. The 
point is to make it very clear what the semantics of that code is.

We'll be able to make decisions about how to fix the code depending on the set 
of gadgets that we've enumerated. Basically, in order to fix a type of a 
variable, each use of that variable has to participate in a gadget, and each 
such gadget has to consent to fixing itself according to the variable's new 
type. More on that in follow-up patches!


Repository:
  rC Clang

https://reviews.llvm.org/D137348

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp

Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -13,11 +13,144 @@
 using namespace clang;
 using namespace ast_matchers;
 
-namespace {
-// TODO: Better abstractions over gadgets.
-using GadgetList = std::vector<const Stmt *>;
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+  return anyOf(hasType(pointerType()),
+               hasType(autoType(hasDeducedType(
+                   hasUnqualifiedDesugaredType(pointerType())))));
 }
 
+namespace {
+/// Gadget is an individual operation in the code that may be of interest to
+/// this analysis. Each (non-abstract) subclass corresponds to a specific
+/// rigid AST structure that constitutes an operation on a pointer-type object.
+/// Discovery of a gadget in the code corresponds to claiming that we understand
+/// what this part of code is doing well enough to potentially improve it.
+/// Gadgets can be unsafe (immediately deserving a warning) or safe (not
+/// deserving a warning per se, but affecting our decision-making process
+/// nonetheless).
+class Gadget {
+public:
+  enum class Kind {
+#define GADGET(x) x,
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+#undef GADGETS
+  };
+
+  /// Determine if a kind is a safe kind. Slower than calling isSafe().
+  static bool isSafeKind(Kind K) {
+    switch (K) {
+#define UNSAFE_GADGET(x)                                                       \
+    case Kind::x:
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+#undef UNSAFE_GADGET
+      return false;
+
+#define SAFE_GADGET(x)                                                         \
+    case Kind::x:
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+#undef SAFE_GADGET
+      return true;
+    }
+    llvm_unreachable("Invalid gadget kind!");
+  }
+
+  /// Common type of ASTMatchers used for discovering gadgets.
+  /// Useful for implementing the static matcher() methods
+  /// that are expected from all non-abstract subclasses.
+  using Matcher = decltype(stmt());
+
+  Gadget(Kind K) : K(K) {}
+
+  Kind getKind() const { return K; }
+
+  virtual bool isSafe() const = 0;
+  virtual const Stmt *getBaseStmt() const = 0;
+
+  virtual ~Gadget() {}
+
+private:
+  Kind K;
+};
+
+using GadgetList = std::vector<std::unique_ptr<Gadget>>;
+
+/// Unsafe gadgets correspond to unsafe code patterns that warrants
+/// an immediate warning.
+class UnsafeGadget : public Gadget {
+public:
+  UnsafeGadget(Kind K) : Gadget(K) {
+    assert(classof(this) && "Invalid unsafe gadget kind!");
+  }
+
+  static bool classof(const Gadget *G) { return !isSafeKind(G->getKind()); }
+  bool isSafe() const override { return false; }
+};
+
+/// Safe gadgets correspond to code patterns that aren't unsafe but need to be
+/// properly recognized in order to emit correct warnings and fixes over unsafe
+/// gadgets. For example, if a raw pointer-type variable is replaced by
+/// a safe C++ container, every use of such variable may need to be
+/// carefully considered and possibly updated.
+class SafeGadget : public Gadget {
+public:
+  SafeGadget(Kind K) : Gadget(K) {
+    assert(classof(this) && "Invalid safe gadget kind!");
+  }
+
+  static bool classof(const Gadget *G) { return isSafeKind(G->getKind()); }
+  bool isSafe() const override { return true; }
+};
+
+/// An increment of a pointer-type value is unsafe as it may run the pointer
+/// out of bounds.
+class IncrementGadget : public UnsafeGadget {
+  const UnaryOperator *Op;
+
+public:
+  IncrementGadget(const MatchFinder::MatchResult &Result)
+      : UnsafeGadget(Kind::Increment),
+        Op(Result.Nodes.getNodeAs<UnaryOperator>("op")) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::Increment;
+  }
+
+  static Matcher matcher() {
+    return stmt(unaryOperator(
+      hasOperatorName("++"),
+      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
+    ).bind("op"));
+  }
+
+  const Stmt *getBaseStmt() const override { return Op; }
+};
+
+/// A decrement of a pointer-type value is unsafe as it may run the pointer
+/// out of bounds.
+class DecrementGadget : public UnsafeGadget {
+  const UnaryOperator *Op;
+
+public:
+  DecrementGadget(const MatchFinder::MatchResult &Result)
+      : UnsafeGadget(Kind::Decrement),
+        Op(Result.Nodes.getNodeAs<UnaryOperator>("op")) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::Decrement;
+  }
+
+  static Matcher matcher() {
+    return stmt(unaryOperator(
+      hasOperatorName("--"),
+      hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
+    ).bind("op"));
+  }
+
+  const Stmt *getBaseStmt() const override { return Op; }
+};
+} // namespace
+
 // Scan the function and return a list of gadgets found with provided kits.
 static GadgetList findGadgets(const Decl *D) {
 
@@ -28,40 +161,45 @@
     GadgetFinderCallback(GadgetList &Output) : Output(Output) {}
 
     void run(const MatchFinder::MatchResult &Result) override {
-      Output.push_back(Result.Nodes.getNodeAs<Stmt>("root_node"));
+      // Figure out which matcher we've found, and call the appropriate
+      // subclass constructor.
+      // FIXME: Can we do this more logarithmically?
+#define GADGET(x)                                                              \
+      if (Result.Nodes.getNodeAs<Stmt>(#x)) {                                  \
+        Output.push_back(std::make_unique<x ## Gadget>(Result));               \
+        return;                                                                \
+      }
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+#undef GADGET
     }
   };
 
   GadgetList G;
   MatchFinder M;
-
-  auto IncrementMatcher = unaryOperator(
-    hasOperatorName("++"),
-    hasUnaryOperand(hasType(pointerType()))
-  );
-  auto DecrementMatcher = unaryOperator(
-    hasOperatorName("--"),
-    hasUnaryOperand(hasType(pointerType()))
-  );
-
   GadgetFinderCallback CB(G);
 
+  // clang-format off
   M.addMatcher(
-      stmt(forEachDescendant(
-        stmt(
-          anyOf(
-            IncrementMatcher,
-            DecrementMatcher
-            /* Fill me in! */
-          )
-          // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
-          // here, to make sure that the statement actually belongs to the
-          // function and not to a nested function. However, forCallable uses
-          // ParentMap which can't be used before the AST is fully constructed.
-          // The original problem doesn't sound like it needs ParentMap though,
-          // maybe there's a more direct solution?
-        ).bind("root_node")
-      )), &CB);
+    stmt(forEachDescendant(
+      stmt(anyOf(
+        // Add Gadget::matcher() for every gadget in the registry.
+#define GADGET(x)                                                              \
+        x ## Gadget::matcher().bind(#x),
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+#undef GADGET
+        // FIXME: Is there a better way to avoid hanging comma?
+        unless(stmt())
+      ))
+      // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
+      // here, to make sure that the statement actually belongs to the
+      // function and not to a nested function. However, forCallable uses
+      // ParentMap which can't be used before the AST is fully constructed.
+      // The original problem doesn't sound like it needs ParentMap though,
+      // maybe there's a more direct solution?
+    )),
+    &CB
+  );
+  // clang-format on
 
   M.match(*D->getBody(), D->getASTContext());
 
@@ -72,8 +210,8 @@
                                    UnsafeBufferUsageHandler &Handler) {
   assert(D && D->getBody());
 
-  GadgetList G = findGadgets(D);
-  for (const Stmt *S : G) {
-    Handler.handleUnsafeOperation(S);
+  GadgetList Gadgets = findGadgets(D);
+  for (const auto &G : Gadgets) {
+    Handler.handleUnsafeOperation(G->getBaseStmt());
   }
 }
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
===================================================================
--- /dev/null
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -0,0 +1,26 @@
+//=- UnsafeBufferUsageGadgets.def - List of ways to use a buffer --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GADGET
+#define GADGET(name)
+#endif
+
+#ifndef UNSAFE_GADGET
+#define UNSAFE_GADGET(name) GADGET(name)
+#endif
+
+#ifndef SAFE_GADGET
+#define SAFE_GADGET(name) GADGET(name)
+#endif
+
+UNSAFE_GADGET(Increment)
+UNSAFE_GADGET(Decrement)
+
+#undef SAFE_GADGET
+#undef UNSAFE_GADGET
+#undef GADGET
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D137348: -Wunsafe-... Artem Dergachev via Phabricator via cfe-commits

Reply via email to