https://github.com/martinboehme updated 
https://github.com/llvm/llvm-project/pull/92401

>From e902cf2e18208a118f90c341d3e375d2c20cdc59 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboe...@google.com>
Date: Thu, 16 May 2024 13:45:57 +0000
Subject: [PATCH 1/3] [clang][dataflow] Make `CNFFormula` externally
 accessible.

This component can be useful when creating implementations of `Solver`, as some
SAT solvers require the input to be in 3-CNF.

As part of making `CNFFormula` externally accessible, I have moved some member
variables out of it that aren't really part of the representation of a 3-CNF
formula and thus live better elsewhere:

*  `WatchedHead` and `NextWatched` have been moved to
   `WatchedLiteralsSolverImpl`, as they're part of the specific algorithm used
   by that SAT solver.

*  `Atomics` has become an output parameter of `buildCNF()` because it has to do
   with the relationship between a `CNFFormula` and the set of `Formula`s it is
   derived from rather than being an integral part of the representation of a
   3-CNF formula.

I have also made all member variables private and added appropriate accessors.
---
 clang/docs/tools/clang-formatted-files.txt    |   3 +
 .../clang/Analysis/FlowSensitive/CNFFormula.h | 180 +++++++
 .../FlowSensitive/WatchedLiteralsSolver.h     |   1 -
 .../lib/Analysis/FlowSensitive/CMakeLists.txt |   1 +
 .../lib/Analysis/FlowSensitive/CNFFormula.cpp | 305 ++++++++++++
 .../FlowSensitive/WatchedLiteralsSolver.cpp   | 468 ++----------------
 .../clang/lib/Analysis/FlowSensitive/BUILD.gn |   1 +
 7 files changed, 542 insertions(+), 417 deletions(-)
 create mode 100644 clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
 create mode 100644 clang/lib/Analysis/FlowSensitive/CNFFormula.cpp

diff --git a/clang/docs/tools/clang-formatted-files.txt 
b/clang/docs/tools/clang-formatted-files.txt
index eaeadf2656b0b..2b55c014791e1 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -124,6 +124,7 @@ 
clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h
 clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
 clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
 clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
 clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
 clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
 clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -626,6 +627,7 @@ clang/tools/libclang/CXCursor.h
 clang/tools/scan-build-py/tests/functional/src/include/clean-one.h
 clang/unittests/Analysis/CFGBuildResult.h
 clang/unittests/Analysis/MacroExpansionContextTest.cpp
+clang/unittests/Analysis/FlowSensitive/CNFFormula.cpp
 clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
 clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
 clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp
@@ -637,6 +639,7 @@ clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
 clang/unittests/Analysis/FlowSensitive/TestingSupport.h
 clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp
 clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
 clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolverTest.cpp
 clang/unittests/AST/ASTImporterFixtures.cpp
 clang/unittests/AST/ASTImporterFixtures.h
diff --git a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h 
b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
new file mode 100644
index 0000000000000..90b0c591b5df5
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
@@ -0,0 +1,180 @@
+//===- CNFFormula.h ---------------------------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  A representation of a boolean formula in 3-CNF.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H
+
+#include <cstdint>
+#include <vector>
+
+#include "clang/Analysis/FlowSensitive/Formula.h"
+
+namespace clang {
+namespace dataflow {
+
+/// Boolean variables are represented as positive integers.
+using Variable = uint32_t;
+
+/// A null boolean variable is used as a placeholder in various data structures
+/// and algorithms.
+constexpr Variable NullVar = 0;
+
+/// Literals are represented as positive integers. Specifically, for a boolean
+/// variable `V` that is represented as the positive integer `I`, the positive
+/// literal `V` is represented as the integer `2*I` and the negative literal
+/// `!V` is represented as the integer `2*I+1`.
+using Literal = uint32_t;
+
+/// A null literal is used as a placeholder in various data structures and
+/// algorithms.
+constexpr Literal NullLit = 0;
+
+/// Clause identifiers are represented as positive integers.
+using ClauseID = uint32_t;
+
+/// A null clause identifier is used as a placeholder in various data 
structures
+/// and algorithms.
+constexpr ClauseID NullClause = 0;
+
+/// Returns the positive literal `V`.
+inline constexpr Literal posLit(Variable V) { return 2 * V; }
+
+/// Returns whether `L` is a positive literal.
+inline constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
+
+/// Returns whether `L` is a negative literal.
+inline constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
+
+/// Returns the negative literal `!V`.
+inline constexpr Literal negLit(Variable V) { return 2 * V + 1; }
+
+/// Returns the negated literal `!L`.
+inline constexpr Literal notLit(Literal L) { return L ^ 1; }
+
+/// Returns the variable of `L`.
+inline constexpr Variable var(Literal L) { return L >> 1; }
+
+/// A boolean formula in 3-CNF (conjunctive normal form with at most 3 literals
+/// per clause).
+class CNFFormula {
+  /// `LargestVar` is equal to the largest positive integer that represents a
+  /// variable in the formula.
+  const Variable LargestVar;
+
+  /// Literals of all clauses in the formula.
+  ///
+  /// The element at index 0 stands for the literal in the null clause. It is
+  /// set to 0 and isn't used. Literals of clauses in the formula start from 
the
+  /// element at index 1.
+  ///
+  /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
+  /// `Clauses` will be `[0, L1, L2, L2, L3, L4]`.
+  std::vector<Literal> Clauses;
+
+  /// Start indices of clauses of the formula in `Clauses`.
+  ///
+  /// The element at index 0 stands for the start index of the null clause. It
+  /// is set to 0 and isn't used. Start indices of clauses in the formula start
+  /// from the element at index 1.
+  ///
+  /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
+  /// `ClauseStarts` will be `[0, 1, 3]`. Note that the literals of the first
+  /// clause always start at index 1. The start index for the literals of the
+  /// second clause depends on the size of the first clause and so on.
+  std::vector<size_t> ClauseStarts;
+
+  /// Indicates that we already know the formula is unsatisfiable.
+  /// During construction, we catch simple cases of conflicting unit-clauses.
+  bool KnownContradictory;
+
+public:
+  explicit CNFFormula(Variable LargestVar);
+
+  /// Adds the `L1 v ... v Ln` clause to the formula.
+  /// Requirements:
+  ///
+  ///  `Li` must not be `NullLit`.
+  ///
+  ///  All literals in the input that are not `NullLit` must be distinct.
+  void addClause(ArrayRef<Literal> lits);
+
+  /// Returns whether the formula is known to be contradictory.
+  /// This is the case if any of the clauses is empty.
+  bool knownContradictory() const { return KnownContradictory; }
+
+  /// Returns the largest variable in the formula.
+  Variable largestVar() const { return LargestVar; }
+
+  /// Returns the number of clauses in the formula.
+  /// Valid clause IDs are in the range [1, `numClauses()`].
+  ClauseID numClauses() const { return ClauseStarts.size() - 1; }
+
+  /// Returns the number of literals in clause `C`.
+  size_t clauseSize(ClauseID C) const {
+    return C == ClauseStarts.size() - 1 ? Clauses.size() - ClauseStarts[C]
+                                        : ClauseStarts[C + 1] - 
ClauseStarts[C];
+  }
+
+  /// Returns the literals of clause `C`.
+  /// If `knownContradictory()` is false, each clause has at least one literal.
+  llvm::ArrayRef<Literal> clauseLiterals(ClauseID C) const {
+    size_t S = clauseSize(C);
+    if (S == 0)
+      return llvm::ArrayRef<Literal>();
+    return llvm::ArrayRef<Literal>(&Clauses[ClauseStarts[C]], S);
+  }
+
+  /// An iterator over all literals of all clauses in the formula.
+  /// The iterator allows mutation of the literal through the `*` operator.
+  /// This is to support solvers that mutate the formula during solving.
+  class Iterator {
+    friend class CNFFormula;
+    CNFFormula *CNF;
+    size_t Idx;
+    Iterator(CNFFormula *CNF, size_t Idx) : CNF(CNF), Idx(Idx) {}
+
+  public:
+    Iterator(const Iterator &) = default;
+    Iterator &operator=(const Iterator &) = default;
+
+    Iterator &operator++() {
+      ++Idx;
+      assert(Idx < CNF->Clauses.size() && "Iterator out of bounds");
+      return *this;
+    }
+
+    Iterator next() const {
+      Iterator I = *this;
+      ++I;
+      return I;
+    }
+
+    Literal &operator*() const { return CNF->Clauses[Idx]; }
+  };
+  friend class Iterator;
+
+  /// Returns an iterator to the first literal of clause `C`.
+  Iterator startOfClause(ClauseID C) { return Iterator(this, ClauseStarts[C]); 
}
+};
+
+/// Converts the conjunction of `Vals` into a formula in conjunctive normal
+/// form where each clause has at least one and at most three literals.
+/// `Atomics` is populated with a mapping from `Variables` to the corresponding
+/// `Atom`s for atomic booleans in the input formulas.
+CNFFormula
+buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
+         llvm::DenseMap<Variable, Atom> &Atomics);
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H
diff --git a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h 
b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
index b5cd7aa10fd7d..c948aa21cec3b 100644
--- a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
+++ b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
@@ -17,7 +17,6 @@
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
 #include "llvm/ADT/ArrayRef.h"
-#include <limits>
 
 namespace clang {
 namespace dataflow {
diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt 
b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
index 6631fe27f3d90..f89d4e57e5819 100644
--- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -2,6 +2,7 @@ add_clang_library(clangAnalysisFlowSensitive
   AdornedCFG.cpp
   Arena.cpp
   ASTOps.cpp
+  CNFFormula.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
   Formula.cpp
diff --git a/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp 
b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp
new file mode 100644
index 0000000000000..521b81fe78aa0
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp
@@ -0,0 +1,305 @@
+//===- CNFFormula.cpp -------------------------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  A representation of a boolean formula in 3-CNF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/CNFFormula.h"
+#include "llvm/ADT/DenseSet.h"
+
+#include <queue>
+
+namespace clang {
+namespace dataflow {
+
+namespace {
+
+/// Applies simplifications while building up a BooleanFormula.
+/// We keep track of unit clauses, which tell us variables that must be
+/// true/false in any model that satisfies the overall formula.
+/// Such variables can be dropped from subsequently-added clauses, which
+/// may in turn yield more unit clauses or even a contradiction.
+/// The total added complexity of this preprocessing is O(N) where we
+/// for every clause, we do a lookup for each unit clauses.
+/// The lookup is O(1) on average. This method won't catch all
+/// contradictory formulas, more passes can in principle catch
+/// more cases but we leave all these and the general case to the
+/// proper SAT solver.
+struct CNFFormulaBuilder {
+  // Formula should outlive CNFFormulaBuilder.
+  explicit CNFFormulaBuilder(CNFFormula &CNF)
+      : Formula(CNF) {}
+
+  /// Adds the `L1 v ... v Ln` clause to the formula. Applies
+  /// simplifications, based on single-literal clauses.
+  ///
+  /// Requirements:
+  ///
+  ///  `Li` must not be `NullLit`.
+  ///
+  ///  All literals must be distinct.
+  void addClause(ArrayRef<Literal> Literals) {
+    // We generate clauses with up to 3 literals in this file.
+    assert(!Literals.empty() && Literals.size() <= 3);
+    // Contains literals of the simplified clause.
+    llvm::SmallVector<Literal> Simplified;
+    for (auto L : Literals) {
+      assert(L != NullLit &&
+             llvm::all_of(Simplified,
+                          [L](Literal S) { return  S != L; }));
+      auto X = var(L);
+      if (trueVars.contains(X)) { // X must be true
+        if (isPosLit(L))
+          return; // Omit clause `(... v X v ...)`, it is `true`.
+        else
+          continue; // Omit `!X` from `(... v !X v ...)`.
+      }
+      if (falseVars.contains(X)) { // X must be false
+        if (isNegLit(L))
+          return; // Omit clause `(... v !X v ...)`, it is `true`.
+        else
+          continue; // Omit `X` from `(... v X v ...)`.
+      }
+      Simplified.push_back(L);
+    }
+    if (Simplified.empty()) {
+      // Simplification made the clause empty, which is equivalent to `false`.
+      // We already know that this formula is unsatisfiable.
+      Formula.addClause(Simplified);
+      return;
+    }
+    if (Simplified.size() == 1) {
+      // We have new unit clause.
+      const Literal lit = Simplified.front();
+      const Variable v = var(lit);
+      if (isPosLit(lit))
+        trueVars.insert(v);
+      else
+        falseVars.insert(v);
+    }
+    Formula.addClause(Simplified);
+  }
+
+  /// Returns true if we observed a contradiction while adding clauses.
+  /// In this case then the formula is already known to be unsatisfiable.
+  bool isKnownContradictory() { return Formula.knownContradictory(); }
+
+private:
+  CNFFormula &Formula;
+  llvm::DenseSet<Variable> trueVars;
+  llvm::DenseSet<Variable> falseVars;
+};
+
+} // namespace
+
+CNFFormula::CNFFormula(Variable LargestVar)
+    : LargestVar(LargestVar), KnownContradictory(false) {
+  Clauses.push_back(0);
+  ClauseStarts.push_back(0);
+}
+
+void CNFFormula::addClause(ArrayRef<Literal> lits) {
+  assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
+
+  if (lits.empty())
+    KnownContradictory = true;
+
+  const size_t S = Clauses.size();
+  ClauseStarts.push_back(S);
+  Clauses.insert(Clauses.end(), lits.begin(), lits.end());
+}
+
+CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
+                    llvm::DenseMap<Variable, Atom> &Atomics) {
+  // The general strategy of the algorithm implemented below is to map each
+  // of the sub-values in `Vals` to a unique variable and use these variables 
in
+  // the resulting CNF expression to avoid exponential blow up. The number of
+  // literals in the resulting formula is guaranteed to be linear in the number
+  // of sub-formulas in `Vals`.
+
+  // Map each sub-formula in `Vals` to a unique variable.
+  llvm::DenseMap<const Formula *, Variable> FormulaToVar;
+  // Store variable identifiers and Atom of atomic booleans.
+  Variable NextVar = 1;
+  {
+    std::queue<const Formula *> UnprocessedFormulas;
+    for (const Formula *F : Formulas)
+      UnprocessedFormulas.push(F);
+    while (!UnprocessedFormulas.empty()) {
+      Variable Var = NextVar;
+      const Formula *F = UnprocessedFormulas.front();
+      UnprocessedFormulas.pop();
+
+      if (!FormulaToVar.try_emplace(F, Var).second)
+        continue;
+      ++NextVar;
+
+      for (const Formula *Op : F->operands())
+        UnprocessedFormulas.push(Op);
+      if (F->kind() == Formula::AtomRef)
+        Atomics[Var] = F->getAtom();
+    }
+  }
+
+  auto GetVar = [&FormulaToVar](const Formula *F) {
+    auto ValIt = FormulaToVar.find(F);
+    assert(ValIt != FormulaToVar.end());
+    return ValIt->second;
+  };
+
+  CNFFormula CNF(NextVar - 1);
+  std::vector<bool> ProcessedSubVals(NextVar, false);
+  CNFFormulaBuilder builder(CNF);
+
+  // Add a conjunct for each variable that represents a top-level conjunction
+  // value in `Vals`.
+  for (const Formula *F : Formulas)
+    builder.addClause(posLit(GetVar(F)));
+
+  // Add conjuncts that represent the mapping between newly-created variables
+  // and their corresponding sub-formulas.
+  std::queue<const Formula *> UnprocessedFormulas;
+  for (const Formula *F : Formulas)
+    UnprocessedFormulas.push(F);
+  while (!UnprocessedFormulas.empty()) {
+    const Formula *F = UnprocessedFormulas.front();
+    UnprocessedFormulas.pop();
+    const Variable Var = GetVar(F);
+
+    if (ProcessedSubVals[Var])
+      continue;
+    ProcessedSubVals[Var] = true;
+
+    switch (F->kind()) {
+    case Formula::AtomRef:
+      break;
+    case Formula::Literal:
+      CNF.addClause(F->literal() ? posLit(Var) : negLit(Var));
+      break;
+    case Formula::And: {
+      const Variable LHS = GetVar(F->operands()[0]);
+      const Variable RHS = GetVar(F->operands()[1]);
+
+      if (LHS == RHS) {
+        // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
+        // already in conjunctive normal form. Below we add each of the
+        // conjuncts of the latter expression to the result.
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
+      } else {
+        // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
+        // !B)` which is already in conjunctive normal form. Below we add each
+        // of the conjuncts of the latter expression to the result.
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({negLit(Var), posLit(RHS)});
+        builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
+      }
+      break;
+    }
+    case Formula::Or: {
+      const Variable LHS = GetVar(F->operands()[0]);
+      const Variable RHS = GetVar(F->operands()[1]);
+
+      if (LHS == RHS) {
+        // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
+        // already in conjunctive normal form. Below we add each of the
+        // conjuncts of the latter expression to the result.
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
+      } else {
+        // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
+        // !B)` which is already in conjunctive normal form. Below we add each
+        // of the conjuncts of the latter expression to the result.
+        builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
+        builder.addClause({posLit(Var), negLit(RHS)});
+      }
+      break;
+    }
+    case Formula::Not: {
+      const Variable Operand = GetVar(F->operands()[0]);
+
+      // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
+      // already in conjunctive normal form. Below we add each of the
+      // conjuncts of the latter expression to the result.
+      builder.addClause({negLit(Var), negLit(Operand)});
+      builder.addClause({posLit(Var), posLit(Operand)});
+      break;
+    }
+    case Formula::Implies: {
+      const Variable LHS = GetVar(F->operands()[0]);
+      const Variable RHS = GetVar(F->operands()[1]);
+
+      // `X <=> (A => B)` is equivalent to
+      // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
+      // conjunctive normal form. Below we add each of the conjuncts of
+      // the latter expression to the result.
+      builder.addClause({posLit(Var), posLit(LHS)});
+      builder.addClause({posLit(Var), negLit(RHS)});
+      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
+      break;
+    }
+    case Formula::Equal: {
+      const Variable LHS = GetVar(F->operands()[0]);
+      const Variable RHS = GetVar(F->operands()[1]);
+
+      if (LHS == RHS) {
+        // `X <=> (A <=> A)` is equivalent to `X` which is already in
+        // conjunctive normal form. Below we add each of the conjuncts of the
+        // latter expression to the result.
+        builder.addClause(posLit(Var));
+
+        // No need to visit the sub-values of `Val`.
+        continue;
+      }
+      // `X <=> (A <=> B)` is equivalent to
+      // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
+      // is already in conjunctive normal form. Below we add each of the
+      // conjuncts of the latter expression to the result.
+      builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
+      builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
+      builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
+      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
+      break;
+    }
+    }
+    if (builder.isKnownContradictory()) {
+      return CNF;
+    }
+    for (const Formula *Child : F->operands())
+      UnprocessedFormulas.push(Child);
+  }
+
+  // Unit clauses that were added later were not
+  // considered for the simplification of earlier clauses. Do a final
+  // pass to find more opportunities for simplification.
+  CNFFormula FinalCNF(NextVar - 1);
+  CNFFormulaBuilder FinalBuilder(FinalCNF);
+
+  // Collect unit clauses.
+  for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
+    if (CNF.clauseSize(C) == 1) {
+      FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
+    }
+  }
+
+  // Add all clauses that were added previously, preserving the order.
+  for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
+    FinalBuilder.addClause(CNF.clauseLiterals(C));
+    if (FinalBuilder.isKnownContradictory()) {
+      break;
+    }
+  }
+  // It is possible there were new unit clauses again, but
+  // we stop here and leave the rest to the solver algorithm.
+  return FinalCNF;
+}
+
+} // namespace dataflow
+} // namespace clang
diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp 
b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
index 3ef3637535324..fcc748a39c1fa 100644
--- a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
+++ b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
@@ -12,18 +12,15 @@
 
//===----------------------------------------------------------------------===//
 
 #include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <queue>
 #include <vector>
 
+#include "clang/Analysis/FlowSensitive/CNFFormula.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 
 
@@ -44,73 +41,16 @@ namespace dataflow {
 //
 // [1] https://en.wikipedia.org/wiki/DPLL_algorithm
 
-/// Boolean variables are represented as positive integers.
-using Variable = uint32_t;
+namespace {
 
-/// A null boolean variable is used as a placeholder in various data structures
-/// and algorithms.
-static constexpr Variable NullVar = 0;
-
-/// Literals are represented as positive integers. Specifically, for a boolean
-/// variable `V` that is represented as the positive integer `I`, the positive
-/// literal `V` is represented as the integer `2*I` and the negative literal
-/// `!V` is represented as the integer `2*I+1`.
-using Literal = uint32_t;
-
-/// A null literal is used as a placeholder in various data structures and
-/// algorithms.
-[[maybe_unused]] static constexpr Literal NullLit = 0;
-
-/// Returns the positive literal `V`.
-static constexpr Literal posLit(Variable V) { return 2 * V; }
-
-static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
-
-static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
-
-/// Returns the negative literal `!V`.
-static constexpr Literal negLit(Variable V) { return 2 * V + 1; }
-
-/// Returns the negated literal `!L`.
-static constexpr Literal notLit(Literal L) { return L ^ 1; }
-
-/// Returns the variable of `L`.
-static constexpr Variable var(Literal L) { return L >> 1; }
-
-/// Clause identifiers are represented as positive integers.
-using ClauseID = uint32_t;
-
-/// A null clause identifier is used as a placeholder in various data 
structures
-/// and algorithms.
-static constexpr ClauseID NullClause = 0;
-
-/// A boolean formula in conjunctive normal form.
-struct CNFFormula {
-  /// `LargestVar` is equal to the largest positive integer that represents a
-  /// variable in the formula.
-  const Variable LargestVar;
-
-  /// Literals of all clauses in the formula.
-  ///
-  /// The element at index 0 stands for the literal in the null clause. It is
-  /// set to 0 and isn't used. Literals of clauses in the formula start from 
the
-  /// element at index 1.
-  ///
-  /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
-  /// `Clauses` will be `[0, L1, L2, L2, L3, L4]`.
-  std::vector<Literal> Clauses;
+class WatchedLiteralsSolverImpl {
+  /// Stores the variable identifier and Atom for atomic booleans in the
+  /// formula.
+  llvm::DenseMap<Variable, Atom> Atomics;
 
-  /// Start indices of clauses of the formula in `Clauses`.
-  ///
-  /// The element at index 0 stands for the start index of the null clause. It
-  /// is set to 0 and isn't used. Start indices of clauses in the formula start
-  /// from the element at index 1.
-  ///
-  /// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
-  /// `ClauseStarts` will be `[0, 1, 3]`. Note that the literals of the first
-  /// clause always start at index 1. The start index for the literals of the
-  /// second clause depends on the size of the first clause and so on.
-  std::vector<size_t> ClauseStarts;
+  /// A boolean formula in conjunctive normal form that the solver will attempt
+  /// to prove satisfiable. The formula will be modified in the process.
+  CNFFormula CNF;
 
   /// Maps literals (indices of the vector) to clause identifiers (elements of
   /// the vector) that watch the respective literals.
@@ -127,328 +67,6 @@ struct CNFFormula {
   /// clauses in the formula start from the element at index 1.
   std::vector<ClauseID> NextWatched;
 
-  /// Stores the variable identifier and Atom for atomic booleans in the
-  /// formula.
-  llvm::DenseMap<Variable, Atom> Atomics;
-
-  /// Indicates that we already know the formula is unsatisfiable.
-  /// During construction, we catch simple cases of conflicting unit-clauses.
-  bool KnownContradictory;
-
-  explicit CNFFormula(Variable LargestVar,
-                      llvm::DenseMap<Variable, Atom> Atomics)
-      : LargestVar(LargestVar), Atomics(std::move(Atomics)),
-        KnownContradictory(false) {
-    Clauses.push_back(0);
-    ClauseStarts.push_back(0);
-    NextWatched.push_back(0);
-    const size_t NumLiterals = 2 * LargestVar + 1;
-    WatchedHead.resize(NumLiterals + 1, 0);
-  }
-
-  /// Adds the `L1 v ... v Ln` clause to the formula.
-  /// Requirements:
-  ///
-  ///  `Li` must not be `NullLit`.
-  ///
-  ///  All literals in the input that are not `NullLit` must be distinct.
-  void addClause(ArrayRef<Literal> lits) {
-    assert(!lits.empty());
-    assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
-
-    const ClauseID C = ClauseStarts.size();
-    const size_t S = Clauses.size();
-    ClauseStarts.push_back(S);
-    Clauses.insert(Clauses.end(), lits.begin(), lits.end());
-
-    // Designate the first literal as the "watched" literal of the clause.
-    NextWatched.push_back(WatchedHead[lits.front()]);
-    WatchedHead[lits.front()] = C;
-  }
-
-  /// Returns the number of literals in clause `C`.
-  size_t clauseSize(ClauseID C) const {
-    return C == ClauseStarts.size() - 1 ? Clauses.size() - ClauseStarts[C]
-                                        : ClauseStarts[C + 1] - 
ClauseStarts[C];
-  }
-
-  /// Returns the literals of clause `C`.
-  llvm::ArrayRef<Literal> clauseLiterals(ClauseID C) const {
-    return llvm::ArrayRef<Literal>(&Clauses[ClauseStarts[C]], clauseSize(C));
-  }
-};
-
-/// Applies simplifications while building up a BooleanFormula.
-/// We keep track of unit clauses, which tell us variables that must be
-/// true/false in any model that satisfies the overall formula.
-/// Such variables can be dropped from subsequently-added clauses, which
-/// may in turn yield more unit clauses or even a contradiction.
-/// The total added complexity of this preprocessing is O(N) where we
-/// for every clause, we do a lookup for each unit clauses.
-/// The lookup is O(1) on average. This method won't catch all
-/// contradictory formulas, more passes can in principle catch
-/// more cases but we leave all these and the general case to the
-/// proper SAT solver.
-struct CNFFormulaBuilder {
-  // Formula should outlive CNFFormulaBuilder.
-  explicit CNFFormulaBuilder(CNFFormula &CNF)
-      : Formula(CNF) {}
-
-  /// Adds the `L1 v ... v Ln` clause to the formula. Applies
-  /// simplifications, based on single-literal clauses.
-  ///
-  /// Requirements:
-  ///
-  ///  `Li` must not be `NullLit`.
-  ///
-  ///  All literals must be distinct.
-  void addClause(ArrayRef<Literal> Literals) {
-    // We generate clauses with up to 3 literals in this file.
-    assert(!Literals.empty() && Literals.size() <= 3);
-    // Contains literals of the simplified clause.
-    llvm::SmallVector<Literal> Simplified;
-    for (auto L : Literals) {
-      assert(L != NullLit &&
-             llvm::all_of(Simplified,
-                          [L](Literal S) { return  S != L; }));
-      auto X = var(L);
-      if (trueVars.contains(X)) { // X must be true
-        if (isPosLit(L))
-          return; // Omit clause `(... v X v ...)`, it is `true`.
-        else
-          continue; // Omit `!X` from `(... v !X v ...)`.
-      }
-      if (falseVars.contains(X)) { // X must be false
-        if (isNegLit(L))
-          return; // Omit clause `(... v !X v ...)`, it is `true`.
-        else
-          continue; // Omit `X` from `(... v X v ...)`.
-      }
-      Simplified.push_back(L);
-    }
-    if (Simplified.empty()) {
-      // Simplification made the clause empty, which is equivalent to `false`.
-      // We already know that this formula is unsatisfiable.
-      Formula.KnownContradictory = true;
-      // We can add any of the input literals to get an unsatisfiable formula.
-      Formula.addClause(Literals[0]);
-      return;
-    }
-    if (Simplified.size() == 1) {
-      // We have new unit clause.
-      const Literal lit = Simplified.front();
-      const Variable v = var(lit);
-      if (isPosLit(lit))
-        trueVars.insert(v);
-      else
-        falseVars.insert(v);
-    }
-    Formula.addClause(Simplified);
-  }
-
-  /// Returns true if we observed a contradiction while adding clauses.
-  /// In this case then the formula is already known to be unsatisfiable.
-  bool isKnownContradictory() { return Formula.KnownContradictory; }
-
-private:
-  CNFFormula &Formula;
-  llvm::DenseSet<Variable> trueVars;
-  llvm::DenseSet<Variable> falseVars;
-};
-
-/// Converts the conjunction of `Vals` into a formula in conjunctive normal
-/// form where each clause has at least one and at most three literals.
-CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
-  // The general strategy of the algorithm implemented below is to map each
-  // of the sub-values in `Vals` to a unique variable and use these variables 
in
-  // the resulting CNF expression to avoid exponential blow up. The number of
-  // literals in the resulting formula is guaranteed to be linear in the number
-  // of sub-formulas in `Vals`.
-
-  // Map each sub-formula in `Vals` to a unique variable.
-  llvm::DenseMap<const Formula *, Variable> SubValsToVar;
-  // Store variable identifiers and Atom of atomic booleans.
-  llvm::DenseMap<Variable, Atom> Atomics;
-  Variable NextVar = 1;
-  {
-    std::queue<const Formula *> UnprocessedSubVals;
-    for (const Formula *Val : Vals)
-      UnprocessedSubVals.push(Val);
-    while (!UnprocessedSubVals.empty()) {
-      Variable Var = NextVar;
-      const Formula *Val = UnprocessedSubVals.front();
-      UnprocessedSubVals.pop();
-
-      if (!SubValsToVar.try_emplace(Val, Var).second)
-        continue;
-      ++NextVar;
-
-      for (const Formula *F : Val->operands())
-        UnprocessedSubVals.push(F);
-      if (Val->kind() == Formula::AtomRef)
-        Atomics[Var] = Val->getAtom();
-    }
-  }
-
-  auto GetVar = [&SubValsToVar](const Formula *Val) {
-    auto ValIt = SubValsToVar.find(Val);
-    assert(ValIt != SubValsToVar.end());
-    return ValIt->second;
-  };
-
-  CNFFormula CNF(NextVar - 1, std::move(Atomics));
-  std::vector<bool> ProcessedSubVals(NextVar, false);
-  CNFFormulaBuilder builder(CNF);
-
-  // Add a conjunct for each variable that represents a top-level conjunction
-  // value in `Vals`.
-  for (const Formula *Val : Vals)
-    builder.addClause(posLit(GetVar(Val)));
-
-  // Add conjuncts that represent the mapping between newly-created variables
-  // and their corresponding sub-formulas.
-  std::queue<const Formula *> UnprocessedSubVals;
-  for (const Formula *Val : Vals)
-    UnprocessedSubVals.push(Val);
-  while (!UnprocessedSubVals.empty()) {
-    const Formula *Val = UnprocessedSubVals.front();
-    UnprocessedSubVals.pop();
-    const Variable Var = GetVar(Val);
-
-    if (ProcessedSubVals[Var])
-      continue;
-    ProcessedSubVals[Var] = true;
-
-    switch (Val->kind()) {
-    case Formula::AtomRef:
-      break;
-    case Formula::Literal:
-      CNF.addClause(Val->literal() ? posLit(Var) : negLit(Var));
-      break;
-    case Formula::And: {
-      const Variable LHS = GetVar(Val->operands()[0]);
-      const Variable RHS = GetVar(Val->operands()[1]);
-
-      if (LHS == RHS) {
-        // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
-        // already in conjunctive normal form. Below we add each of the
-        // conjuncts of the latter expression to the result.
-        builder.addClause({negLit(Var), posLit(LHS)});
-        builder.addClause({posLit(Var), negLit(LHS)});
-      } else {
-        // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
-        // !B)` which is already in conjunctive normal form. Below we add each
-        // of the conjuncts of the latter expression to the result.
-        builder.addClause({negLit(Var), posLit(LHS)});
-        builder.addClause({negLit(Var), posLit(RHS)});
-        builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
-      }
-      break;
-    }
-    case Formula::Or: {
-      const Variable LHS = GetVar(Val->operands()[0]);
-      const Variable RHS = GetVar(Val->operands()[1]);
-
-      if (LHS == RHS) {
-        // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
-        // already in conjunctive normal form. Below we add each of the
-        // conjuncts of the latter expression to the result.
-        builder.addClause({negLit(Var), posLit(LHS)});
-        builder.addClause({posLit(Var), negLit(LHS)});
-      } else {
-        // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
-        // !B)` which is already in conjunctive normal form. Below we add each
-        // of the conjuncts of the latter expression to the result.
-        builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
-        builder.addClause({posLit(Var), negLit(LHS)});
-        builder.addClause({posLit(Var), negLit(RHS)});
-      }
-      break;
-    }
-    case Formula::Not: {
-      const Variable Operand = GetVar(Val->operands()[0]);
-
-      // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
-      // already in conjunctive normal form. Below we add each of the
-      // conjuncts of the latter expression to the result.
-      builder.addClause({negLit(Var), negLit(Operand)});
-      builder.addClause({posLit(Var), posLit(Operand)});
-      break;
-    }
-    case Formula::Implies: {
-      const Variable LHS = GetVar(Val->operands()[0]);
-      const Variable RHS = GetVar(Val->operands()[1]);
-
-      // `X <=> (A => B)` is equivalent to
-      // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
-      // conjunctive normal form. Below we add each of the conjuncts of
-      // the latter expression to the result.
-      builder.addClause({posLit(Var), posLit(LHS)});
-      builder.addClause({posLit(Var), negLit(RHS)});
-      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
-      break;
-    }
-    case Formula::Equal: {
-      const Variable LHS = GetVar(Val->operands()[0]);
-      const Variable RHS = GetVar(Val->operands()[1]);
-
-      if (LHS == RHS) {
-        // `X <=> (A <=> A)` is equivalent to `X` which is already in
-        // conjunctive normal form. Below we add each of the conjuncts of the
-        // latter expression to the result.
-        builder.addClause(posLit(Var));
-
-        // No need to visit the sub-values of `Val`.
-        continue;
-      }
-      // `X <=> (A <=> B)` is equivalent to
-      // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
-      // is already in conjunctive normal form. Below we add each of the
-      // conjuncts of the latter expression to the result.
-      builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
-      builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
-      builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
-      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
-      break;
-    }
-    }
-    if (builder.isKnownContradictory()) {
-      return CNF;
-    }
-    for (const Formula *Child : Val->operands())
-      UnprocessedSubVals.push(Child);
-  }
-
-  // Unit clauses that were added later were not
-  // considered for the simplification of earlier clauses. Do a final
-  // pass to find more opportunities for simplification.
-  CNFFormula FinalCNF(NextVar - 1, std::move(CNF.Atomics));
-  CNFFormulaBuilder FinalBuilder(FinalCNF);
-
-  // Collect unit clauses.
-  for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
-    if (CNF.clauseSize(C) == 1) {
-      FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
-    }
-  }
-
-  // Add all clauses that were added previously, preserving the order.
-  for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
-    FinalBuilder.addClause(CNF.clauseLiterals(C));
-    if (FinalBuilder.isKnownContradictory()) {
-      break;
-    }
-  }
-  // It is possible there were new unit clauses again, but
-  // we stop here and leave the rest to the solver algorithm.
-  return FinalCNF;
-}
-
-class WatchedLiteralsSolverImpl {
-  /// A boolean formula in conjunctive normal form that the solver will attempt
-  /// to prove satisfiable. The formula will be modified in the process.
-  CNFFormula CNF;
-
   /// The search for a satisfying assignment of the variables in `Formula` will
   /// proceed in levels, starting from 1 and going up to `Formula.LargestVar`
   /// (inclusive). The current level is stored in `Level`. At each level the
@@ -501,20 +119,37 @@ class WatchedLiteralsSolverImpl {
 public:
   explicit WatchedLiteralsSolverImpl(
       const llvm::ArrayRef<const Formula *> &Vals)
-      : CNF(buildCNF(Vals)), LevelVars(CNF.LargestVar + 1),
-        LevelStates(CNF.LargestVar + 1) {
+      // `Atomics` needs to be initialized first so that we can use it as an
+      // output argument of `buildCNF()`.
+      : Atomics(), CNF(buildCNF(Vals, Atomics)),
+        LevelVars(CNF.largestVar() + 1), LevelStates(CNF.largestVar() + 1) {
     assert(!Vals.empty());
 
+    // Skip initialization if the formula is known to be contradictory.
+    if (CNF.knownContradictory())
+      return;
+
+    // Initialize `NextWatched` and `WatchedHead`.
+    NextWatched.push_back(0);
+    const size_t NumLiterals = 2 * CNF.largestVar() + 1;
+    WatchedHead.resize(NumLiterals + 1, 0);
+    for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
+      // Designate the first literal as the "watched" literal of the clause.
+      Literal FirstLit = CNF.clauseLiterals(C).front();
+      NextWatched.push_back(WatchedHead[FirstLit]);
+      WatchedHead[FirstLit] = C;
+    }
+
     // Initialize the state at the root level to a decision so that in
     // `reverseForcedMoves` we don't have to check that `Level >= 0` on each
     // iteration.
     LevelStates[0] = State::Decision;
 
     // Initialize all variables as unassigned.
-    VarAssignments.resize(CNF.LargestVar + 1, Assignment::Unassigned);
+    VarAssignments.resize(CNF.largestVar() + 1, Assignment::Unassigned);
 
     // Initialize the active variables.
-    for (Variable Var = CNF.LargestVar; Var != NullVar; --Var) {
+    for (Variable Var = CNF.largestVar(); Var != NullVar; --Var) {
       if (isWatched(posLit(Var)) || isWatched(negLit(Var)))
         ActiveVars.push_back(Var);
     }
@@ -523,7 +158,7 @@ class WatchedLiteralsSolverImpl {
   // Returns the `Result` and the number of iterations "remaining" from
   // `MaxIterations` (that is, `MaxIterations` - iterations in this call).
   std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && 
{
-    if (CNF.KnownContradictory) {
+    if (CNF.knownContradictory()) {
       // Short-cut the solving process. We already found out at CNF
       // construction time that the formula is unsatisfiable.
       return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations);
@@ -625,7 +260,7 @@ class WatchedLiteralsSolverImpl {
   /// Returns a satisfying truth assignment to the atoms in the boolean 
formula.
   llvm::DenseMap<Atom, Solver::Result::Assignment> buildSolution() {
     llvm::DenseMap<Atom, Solver::Result::Assignment> Solution;
-    for (auto &Atomic : CNF.Atomics) {
+    for (auto &Atomic : Atomics) {
       // A variable may have a definite true/false assignment, or it may be
       // unassigned indicating its truth value does not affect the result of
       // the formula. Unassigned variables are assigned to true as a default.
@@ -661,24 +296,25 @@ class WatchedLiteralsSolverImpl {
     const Literal FalseLit = VarAssignments[Var] == Assignment::AssignedTrue
                                  ? negLit(Var)
                                  : posLit(Var);
-    ClauseID FalseLitWatcher = CNF.WatchedHead[FalseLit];
-    CNF.WatchedHead[FalseLit] = NullClause;
+    ClauseID FalseLitWatcher = WatchedHead[FalseLit];
+    WatchedHead[FalseLit] = NullClause;
     while (FalseLitWatcher != NullClause) {
-      const ClauseID NextFalseLitWatcher = CNF.NextWatched[FalseLitWatcher];
+      const ClauseID NextFalseLitWatcher = NextWatched[FalseLitWatcher];
 
       // Pick the first non-false literal as the new watched literal.
-      const size_t FalseLitWatcherStart = CNF.ClauseStarts[FalseLitWatcher];
-      size_t NewWatchedLitIdx = FalseLitWatcherStart + 1;
-      while (isCurrentlyFalse(CNF.Clauses[NewWatchedLitIdx]))
-        ++NewWatchedLitIdx;
-      const Literal NewWatchedLit = CNF.Clauses[NewWatchedLitIdx];
+      const CNFFormula::Iterator FalseLitWatcherStart =
+          CNF.startOfClause(FalseLitWatcher);
+      CNFFormula::Iterator NewWatchedLitIter = FalseLitWatcherStart.next();
+      while (isCurrentlyFalse(*NewWatchedLitIter))
+        ++NewWatchedLitIter;
+      const Literal NewWatchedLit = *NewWatchedLitIter;
       const Variable NewWatchedLitVar = var(NewWatchedLit);
 
       // Swap the old watched literal for the new one in `FalseLitWatcher` to
       // maintain the invariant that the watched literal is at the beginning of
       // the clause.
-      CNF.Clauses[NewWatchedLitIdx] = FalseLit;
-      CNF.Clauses[FalseLitWatcherStart] = NewWatchedLit;
+      *NewWatchedLitIter = FalseLit;
+      *FalseLitWatcherStart = NewWatchedLit;
 
       // If the new watched literal isn't watched by any other clause and its
       // variable isn't assigned we need to add it to the active variables.
@@ -686,8 +322,8 @@ class WatchedLiteralsSolverImpl {
           VarAssignments[NewWatchedLitVar] == Assignment::Unassigned)
         ActiveVars.push_back(NewWatchedLitVar);
 
-      CNF.NextWatched[FalseLitWatcher] = CNF.WatchedHead[NewWatchedLit];
-      CNF.WatchedHead[NewWatchedLit] = FalseLitWatcher;
+      NextWatched[FalseLitWatcher] = WatchedHead[NewWatchedLit];
+      WatchedHead[NewWatchedLit] = FalseLitWatcher;
 
       // Go to the next clause that watches `FalseLit`.
       FalseLitWatcher = NextFalseLitWatcher;
@@ -697,8 +333,8 @@ class WatchedLiteralsSolverImpl {
   /// Returns true if and only if one of the clauses that watch `Lit` is a unit
   /// clause.
   bool watchedByUnitClause(Literal Lit) const {
-    for (ClauseID LitWatcher = CNF.WatchedHead[Lit]; LitWatcher != NullClause;
-         LitWatcher = CNF.NextWatched[LitWatcher]) {
+    for (ClauseID LitWatcher = WatchedHead[Lit]; LitWatcher != NullClause;
+         LitWatcher = NextWatched[LitWatcher]) {
       llvm::ArrayRef<Literal> Clause = CNF.clauseLiterals(LitWatcher);
 
       // Assert the invariant that the watched literal is always the first one
@@ -728,9 +364,7 @@ class WatchedLiteralsSolverImpl {
   }
 
   /// Returns true if and only if `Lit` is watched by a clause in `Formula`.
-  bool isWatched(Literal Lit) const {
-    return CNF.WatchedHead[Lit] != NullClause;
-  }
+  bool isWatched(Literal Lit) const { return WatchedHead[Lit] != NullClause; }
 
   /// Returns an assignment for an unassigned variable.
   Assignment decideAssignment(Variable Var) const {
@@ -742,8 +376,8 @@ class WatchedLiteralsSolverImpl {
   /// Returns a set of all watched literals.
   llvm::DenseSet<Literal> watchedLiterals() const {
     llvm::DenseSet<Literal> WatchedLiterals;
-    for (Literal Lit = 2; Lit < CNF.WatchedHead.size(); Lit++) {
-      if (CNF.WatchedHead[Lit] == NullClause)
+    for (Literal Lit = 2; Lit < WatchedHead.size(); Lit++) {
+      if (WatchedHead[Lit] == NullClause)
         continue;
       WatchedLiterals.insert(Lit);
     }
@@ -783,6 +417,8 @@ class WatchedLiteralsSolverImpl {
   }
 };
 
+} // namespace
+
 Solver::Result
 WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) {
   if (Vals.empty())
diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn 
b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
index 22433459a7878..393596186c0c6 100644
--- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn
@@ -26,6 +26,7 @@ static_library("FlowSensitive") {
     "ASTOps.cpp",
     "AdornedCFG.cpp",
     "Arena.cpp",
+    "CNFFormula.cpp",
     "DataflowAnalysisContext.cpp",
     "DataflowEnvironment.cpp",
     "DebugSupport.cpp",

>From 9ac306ee6a7d2e30769443e7c879b7f068dfe6cd Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboe...@google.com>
Date: Tue, 21 May 2024 06:43:23 +0000
Subject: [PATCH 2/3] fixup! [clang][dataflow] Make `CNFFormula` externally
 accessible.

---
 clang/include/clang/Analysis/FlowSensitive/CNFFormula.h | 5 ++---
 clang/lib/Analysis/FlowSensitive/CNFFormula.cpp         | 6 ++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h 
b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
index 90b0c591b5df5..850ca66918d82 100644
--- a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
+++ b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
@@ -170,9 +170,8 @@ class CNFFormula {
 /// form where each clause has at least one and at most three literals.
 /// `Atomics` is populated with a mapping from `Variables` to the corresponding
 /// `Atom`s for atomic booleans in the input formulas.
-CNFFormula
-buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
-         llvm::DenseMap<Variable, Atom> &Atomics);
+CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
+                    llvm::DenseMap<Variable, Atom> &Atomics);
 
 } // namespace dataflow
 } // namespace clang
diff --git a/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp 
b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp
index 521b81fe78aa0..2410ce1e7bd60 100644
--- a/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp
+++ b/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp
@@ -33,8 +33,7 @@ namespace {
 /// proper SAT solver.
 struct CNFFormulaBuilder {
   // Formula should outlive CNFFormulaBuilder.
-  explicit CNFFormulaBuilder(CNFFormula &CNF)
-      : Formula(CNF) {}
+  explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {}
 
   /// Adds the `L1 v ... v Ln` clause to the formula. Applies
   /// simplifications, based on single-literal clauses.
@@ -51,8 +50,7 @@ struct CNFFormulaBuilder {
     llvm::SmallVector<Literal> Simplified;
     for (auto L : Literals) {
       assert(L != NullLit &&
-             llvm::all_of(Simplified,
-                          [L](Literal S) { return  S != L; }));
+             llvm::all_of(Simplified, [L](Literal S) { return S != L; }));
       auto X = var(L);
       if (trueVars.contains(X)) { // X must be true
         if (isPosLit(L))

>From 99058c4f83e789648786e15cdc5c621a91b3fd12 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboe...@google.com>
Date: Tue, 21 May 2024 06:52:18 +0000
Subject: [PATCH 3/3] fixup! fixup! [clang][dataflow] Make `CNFFormula`
 externally accessible.

---
 .../clang/Analysis/FlowSensitive/CNFFormula.h      |  6 +++---
 .../Analysis/FlowSensitive/WatchedLiteralsSolver.h |  6 ++++--
 .../FlowSensitive/WatchedLiteralsSolver.cpp        | 14 --------------
 3 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h 
b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
index 850ca66918d82..fb13e774c67fe 100644
--- a/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
+++ b/clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
@@ -48,15 +48,15 @@ constexpr ClauseID NullClause = 0;
 /// Returns the positive literal `V`.
 inline constexpr Literal posLit(Variable V) { return 2 * V; }
 
+/// Returns the negative literal `!V`.
+inline constexpr Literal negLit(Variable V) { return 2 * V + 1; }
+
 /// Returns whether `L` is a positive literal.
 inline constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
 
 /// Returns whether `L` is a negative literal.
 inline constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
 
-/// Returns the negative literal `!V`.
-inline constexpr Literal negLit(Variable V) { return 2 * V + 1; }
-
 /// Returns the negated literal `!L`.
 inline constexpr Literal notLit(Literal L) { return L ^ 1; }
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h 
b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
index c948aa21cec3b..d74380b78e935 100644
--- a/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
+++ b/clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h
@@ -23,9 +23,11 @@ namespace dataflow {
 
 /// A SAT solver that is an implementation of Algorithm D from Knuth's The Art
 /// of Computer Programming Volume 4: Satisfiability, Fascicle 6. It is based 
on
-/// the Davis-Putnam-Logemann-Loveland (DPLL) algorithm, keeps references to a
-/// single "watched" literal per clause, and uses a set of "active" variables
+/// the Davis-Putnam-Logemann-Loveland (DPLL) algorithm [1], keeps references 
to
+/// a single "watched" literal per clause, and uses a set of "active" variables
 /// for unit propagation.
+//
+// [1] https://en.wikipedia.org/wiki/DPLL_algorithm
 class WatchedLiteralsSolver : public Solver {
   // Count of the iterations of the main loop of the solver. This spans *all*
   // calls to the underlying solver across the life of this object. It is
diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp 
b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
index fcc748a39c1fa..a39f0e0b29ad1 100644
--- a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
+++ b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
@@ -27,20 +27,6 @@
 namespace clang {
 namespace dataflow {
 
-// `WatchedLiteralsSolver` is an implementation of Algorithm D from Knuth's
-// The Art of Computer Programming Volume 4: Satisfiability, Fascicle 6. It is
-// based on the backtracking DPLL algorithm [1], keeps references to a single
-// "watched" literal per clause, and uses a set of "active" variables to 
perform
-// unit propagation.
-//
-// The solver expects that its input is a boolean formula in conjunctive normal
-// form that consists of clauses of at least one literal. A literal is either a
-// boolean variable or its negation. Below we define types, data structures, 
and
-// utilities that are used to represent boolean formulas in conjunctive normal
-// form.
-//
-// [1] https://en.wikipedia.org/wiki/DPLL_algorithm
-
 namespace {
 
 class WatchedLiteralsSolverImpl {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to