https://github.com/necto updated https://github.com/llvm/llvm-project/pull/121551
>From 115814c2776b6acc8f4a08ec696a3cb27a7c0ebd Mon Sep 17 00:00:00 2001 From: Arseniy Zaostrovnykh <necto...@gmail.com> Date: Thu, 2 Jan 2025 09:58:53 +0100 Subject: [PATCH 1/5] Add SymbolID to every SymExpr, not just SymbolData --- .../Core/PathSensitive/SymExpr.h | 25 ++++++++++++------- .../Core/PathSensitive/SymbolManager.h | 19 +++++++------- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 15 +++++++---- clang/test/Analysis/dump_egraph.cpp | 2 +- .../expr-inspection-printState-diseq-info.c | 12 ++++----- .../expr-inspection-printState-eq-classes.c | 4 +-- clang/test/Analysis/ptr-arith.cpp | 4 +-- ...symbol-simplification-disequality-info.cpp | 20 +++++++-------- ...-simplification-fixpoint-one-iteration.cpp | 12 ++++----- ...simplification-fixpoint-two-iterations.cpp | 18 ++++++------- clang/test/Analysis/unary-sym-expr.c | 6 ++--- 11 files changed, 75 insertions(+), 62 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h index 862a30c0e73633..2b6401eb05b72b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -25,6 +25,8 @@ namespace ento { class MemRegion; +using SymbolID = unsigned; + /// Symbolic value. These values used to capture symbolic execution of /// the program. class SymExpr : public llvm::FoldingSetNode { @@ -39,9 +41,19 @@ class SymExpr : public llvm::FoldingSetNode { private: Kind K; + /// A unique identifier for this symbol. + /// + /// It is useful for SymbolData to easily differentiate multiple symbols, but + /// also for "ephemeral" symbols, such as binary operations, because this id + /// can be used for arranging constraints or equivalence classes instead of + /// unstable pointer values. + /// + /// Note, however, that it can't be used in Profile because SymbolManager + /// needs to compute Profile before allocating SymExpr. + const SymbolID Sym; protected: - SymExpr(Kind k) : K(k) {} + SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {} static bool isValidTypeForSymbol(QualType T) { // FIXME: Depending on whether we choose to deprecate structural symbols, @@ -56,6 +68,8 @@ class SymExpr : public llvm::FoldingSetNode { Kind getKind() const { return K; } + SymbolID getSymbolID() const { return Sym; } + virtual void dump() const; virtual void dumpToStream(raw_ostream &os) const {} @@ -112,19 +126,14 @@ inline raw_ostream &operator<<(raw_ostream &os, using SymbolRef = const SymExpr *; using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>; -using SymbolID = unsigned; /// A symbol representing data which can be stored in a memory location /// (region). class SymbolData : public SymExpr { - const SymbolID Sym; - void anchor() override; protected: - SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) { - assert(classof(this)); - } + SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); } public: ~SymbolData() override = default; @@ -132,8 +141,6 @@ class SymbolData : public SymExpr { /// Get a string representation of the kind of the region. virtual StringRef getKindStr() const = 0; - SymbolID getSymbolID() const { return Sym; } - unsigned computeComplexity() const override { return 1; }; diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 73732d532f630f..e6d7f5a37130d1 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -287,8 +287,8 @@ class SymbolCast : public SymExpr { QualType ToTy; public: - SymbolCast(const SymExpr *In, QualType From, QualType To) - : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { + SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To) + : SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) { assert(In); assert(isValidTypeForSymbol(From)); // FIXME: GenericTaintChecker creates symbols of void type. @@ -333,8 +333,9 @@ class UnarySymExpr : public SymExpr { QualType T; public: - UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T) - : SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) { + UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op, + QualType T) + : SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) { // Note, some unary operators are modeled as a binary operator. E.g. ++x is // modeled as x + 1. assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression"); @@ -381,8 +382,8 @@ class BinarySymExpr : public SymExpr { QualType T; protected: - BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t) - : SymExpr(k), Op(op), T(t) { + BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t) + : SymExpr(k, Sym), Op(op), T(t) { assert(classof(this)); // Binary expressions are results of arithmetic. Pointer arithmetic is not // handled by binary expressions, but it is instead handled by applying @@ -426,9 +427,9 @@ class BinarySymExprImpl : public BinarySymExpr { RHSTYPE RHS; public: - BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, - QualType t) - : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) { + BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op, + RHSTYPE rhs, QualType t) + : BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) { assert(getPointer(lhs)); assert(getPointer(rhs)); } diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index f21e5c3ad7bd7c..573a401e62044d 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -252,8 +252,9 @@ SymbolManager::getCastSymbol(const SymExpr *Op, void *InsertPos; SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymbolCast(Op, From, To); + data = new (BPAlloc) SymbolCast(SymbolCounter, Op, From, To); DataSet.InsertNode(data, InsertPos); + ++SymbolCounter; } return cast<SymbolCast>(data); @@ -268,8 +269,9 @@ const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymIntExpr(lhs, op, v, t); + data = new (BPAlloc) SymIntExpr(SymbolCounter, lhs, op, v, t); DataSet.InsertNode(data, InsertPos); + ++SymbolCounter; } return cast<SymIntExpr>(data); @@ -284,8 +286,9 @@ const IntSymExpr *SymbolManager::getIntSymExpr(APSIntPtr lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) IntSymExpr(lhs, op, rhs, t); + data = new (BPAlloc) IntSymExpr(SymbolCounter, lhs, op, rhs, t); DataSet.InsertNode(data, InsertPos); + ++SymbolCounter; } return cast<IntSymExpr>(data); @@ -301,8 +304,9 @@ const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymSymExpr(lhs, op, rhs, t); + data = new (BPAlloc) SymSymExpr(SymbolCounter, lhs, op, rhs, t); DataSet.InsertNode(data, InsertPos); + ++SymbolCounter; } return cast<SymSymExpr>(data); @@ -316,8 +320,9 @@ const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand, void *InsertPos; SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) UnarySymExpr(Operand, Opc, T); + data = new (BPAlloc) UnarySymExpr(SymbolCounter, Operand, Opc, T); DataSet.InsertNode(data, InsertPos); + ++SymbolCounter; } return cast<UnarySymExpr>(data); diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp index d1229b26346740..13459699a06f6f 100644 --- a/clang/test/Analysis/dump_egraph.cpp +++ b/clang/test/Analysis/dump_egraph.cpp @@ -21,7 +21,7 @@ void foo() { // CHECK: \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"location\": \{ \"line\": 15, \"column\": 5, \"file\": \"{{.*}}dump_egraph.cpp\" \}, \"items\": [\l \{ \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t.s\" -// CHECK: \"cluster\": \"t\", \"pointer\": \"{{0x[0-9a-f]+}}\", \"items\": [\l \{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$2\{int, LC5, no stmt, #1\}\" +// CHECK: \"cluster\": \"t\", \"pointer\": \"{{0x[0-9a-f]+}}\", \"items\": [\l \{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC5, no stmt, #1\}\" // CHECK: \"dynamic_types\": [\l \{ \"region\": \"HeapSymRegion\{conj_$1\{S *, LC1, S{{[0-9]+}}, #1\}\}\", \"dyn_type\": \"S\", \"sub_classable\": false \}\l diff --git a/clang/test/Analysis/expr-inspection-printState-diseq-info.c b/clang/test/Analysis/expr-inspection-printState-diseq-info.c index c5c31785a600ef..515fcbbd430791 100644 --- a/clang/test/Analysis/expr-inspection-printState-diseq-info.c +++ b/clang/test/Analysis/expr-inspection-printState-diseq-info.c @@ -18,17 +18,17 @@ void test_disequality_info(int e0, int b0, int b1, int c0) { // CHECK-NEXT: { // CHECK-NEXT: "class": [ "(reg_$0<int e0>) - 2" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$2<int b1>" ]] + // CHECK-NEXT: [ "reg_$7<int b1>" ]] // CHECK-NEXT: }, // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$2<int b1>" ], + // CHECK-NEXT: "class": [ "reg_$15<int c0>" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "(reg_$0<int e0>) - 2" ], - // CHECK-NEXT: [ "reg_$3<int c0>" ]] + // CHECK-NEXT: [ "reg_$7<int b1>" ]] // CHECK-NEXT: }, // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3<int c0>" ], + // CHECK-NEXT: "class": [ "reg_$7<int b1>" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$2<int b1>" ]] + // CHECK-NEXT: [ "(reg_$0<int e0>) - 2" ], + // CHECK-NEXT: [ "reg_$15<int c0>" ]] // CHECK-NEXT: } // CHECK-NEXT: ], diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c index 38e23d6e838269..19cc13735ab5a6 100644 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -16,6 +16,6 @@ void test_equivalence_classes(int a, int b, int c, int d) { } // CHECK: "equivalence_classes": [ -// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ], -// CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ] +// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$5<int c>)" ], +// CHECK-NEXT: [ "reg_$0<int a>", "reg_$20<int d>", "reg_$5<int c>" ] // CHECK-NEXT: ], diff --git a/clang/test/Analysis/ptr-arith.cpp b/clang/test/Analysis/ptr-arith.cpp index a1264a1f04839c..ec1c75c0c40632 100644 --- a/clang/test/Analysis/ptr-arith.cpp +++ b/clang/test/Analysis/ptr-arith.cpp @@ -139,10 +139,10 @@ struct parse_t { int parse(parse_t *p) { unsigned copy = p->bits2; clang_analyzer_dump(copy); - // expected-warning@-1 {{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>}} + // expected-warning@-1 {{reg_$2<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>}} header *bits = (header *)© clang_analyzer_dump(bits->b); - // expected-warning@-1 {{derived_$2{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}} + // expected-warning@-1 {{derived_$4{reg_$2<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}} return bits->b; // no-warning } } // namespace Bug_55934 diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp index 69238b583eb846..33b8f150f5d021 100644 --- a/clang/test/Analysis/symbol-simplification-disequality-info.cpp +++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp @@ -14,14 +14,14 @@ void test(int a, int b, int c, int d) { clang_analyzer_printState(); // CHECK: "disequality_info": [ // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ], + // CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$2<int b>)) + (reg_$5<int c>)" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3<int d>" ]] + // CHECK-NEXT: [ "reg_$8<int d>" ]] // CHECK-NEXT: }, // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3<int d>" ], + // CHECK-NEXT: "class": [ "reg_$8<int d>" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]] + // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$2<int b>)) + (reg_$5<int c>)" ]] // CHECK-NEXT: } // CHECK-NEXT: ], @@ -32,14 +32,14 @@ void test(int a, int b, int c, int d) { clang_analyzer_printState(); // CHECK: "disequality_info": [ // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "(reg_$0<int a>) + (reg_$2<int c>)" ], + // CHECK-NEXT: "class": [ "(reg_$0<int a>) + (reg_$5<int c>)" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3<int d>" ]] + // CHECK-NEXT: [ "reg_$8<int d>" ]] // CHECK-NEXT: }, // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3<int d>" ], + // CHECK-NEXT: "class": [ "reg_$8<int d>" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$2<int c>)" ]] + // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$5<int c>)" ]] // CHECK-NEXT: } // CHECK-NEXT: ], @@ -50,10 +50,10 @@ void test(int a, int b, int c, int d) { // CHECK-NEXT: { // CHECK-NEXT: "class": [ "reg_$0<int a>" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3<int d>" ]] + // CHECK-NEXT: [ "reg_$8<int d>" ]] // CHECK-NEXT: }, // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3<int d>" ], + // CHECK-NEXT: "class": [ "reg_$8<int d>" ], // CHECK-NEXT: "disequal_to": [ // CHECK-NEXT: [ "reg_$0<int a>" ]] // CHECK-NEXT: } diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp index 73922d420a8c3d..42e984762538e1 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp @@ -13,10 +13,10 @@ void test(int a, int b, int c) { return; clang_analyzer_printState(); // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "range": "{ [0, 0] }" } + // CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$2<int b>)) != (reg_$5<int c>)", "range": "{ [0, 0] }" } // CHECK-NEXT: ], // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$2<int c>" ] + // CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$2<int b>)", "reg_$5<int c>" ] // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, @@ -25,12 +25,12 @@ void test(int a, int b, int c) { return; clang_analyzer_printState(); // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" } + // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$5<int c>)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2<int b>", "range": "{ [0, 0] }" } // CHECK-NEXT: ], // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ], - // CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>" ] + // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$5<int c>)" ], + // CHECK-NEXT: [ "reg_$0<int a>", "reg_$5<int c>" ] // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp index 679ed3fda7a7a7..cffb5a70869ebe 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp @@ -15,11 +15,11 @@ void test(int a, int b, int c, int d) { return; clang_analyzer_printState(); // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "(reg_$2<int c>) + (reg_$1<int b>)", "range": "{ [0, 0] }" } + // CHECK-NEXT: { "symbol": "(((reg_$0<int a>) + (reg_$2<int b>)) + (reg_$5<int c>)) != (reg_$8<int d>)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$5<int c>) + (reg_$2<int b>)", "range": "{ [0, 0] }" } // CHECK-NEXT: ], // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "reg_$3<int d>" ] + // CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$2<int b>)) + (reg_$5<int c>)", "reg_$8<int d>" ] // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, @@ -28,14 +28,14 @@ void test(int a, int b, int c, int d) { return; clang_analyzer_printState(); // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" } + // CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$8<int d>)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2<int b>", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$5<int c>", "range": "{ [0, 0] }" } // CHECK-NEXT: ], // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$3<int d>)" ], - // CHECK-NEXT: [ "reg_$0<int a>", "reg_$3<int d>" ], - // CHECK-NEXT: [ "reg_$2<int c>" ] + // CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$8<int d>)" ], + // CHECK-NEXT: [ "reg_$0<int a>", "reg_$8<int d>" ], + // CHECK-NEXT: [ "reg_$5<int c>" ] // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, diff --git a/clang/test/Analysis/unary-sym-expr.c b/clang/test/Analysis/unary-sym-expr.c index 92e11b295bee7c..64a01a956c442c 100644 --- a/clang/test/Analysis/unary-sym-expr.c +++ b/clang/test/Analysis/unary-sym-expr.c @@ -11,9 +11,9 @@ int test(int x, int y) { clang_analyzer_dump(-x); // expected-warning{{-reg_$0<int x>}} clang_analyzer_dump(~x); // expected-warning{{~reg_$0<int x>}} int z = x + y; - clang_analyzer_dump(-z); // expected-warning{{-((reg_$0<int x>) + (reg_$1<int y>))}} - clang_analyzer_dump(-(x + y)); // expected-warning{{-((reg_$0<int x>) + (reg_$1<int y>))}} - clang_analyzer_dump(-x + y); // expected-warning{{(-reg_$0<int x>) + (reg_$1<int y>)}} + clang_analyzer_dump(-z); // expected-warning{{-((reg_$0<int x>) + (reg_$3<int y>))}} + clang_analyzer_dump(-(x + y)); // expected-warning{{-((reg_$0<int x>) + (reg_$3<int y>))}} + clang_analyzer_dump(-x + y); // expected-warning{{(-reg_$0<int x>) + (reg_$3<int y>)}} if (-x == 0) { clang_analyzer_eval(-x == 0); // expected-warning{{TRUE}} >From 5e5adba82479d6ddb0c379ba9eb16f4a0ffe8a24 Mon Sep 17 00:00:00 2001 From: Arseniy Zaostrovnykh <necto...@gmail.com> Date: Thu, 2 Jan 2025 11:35:04 +0100 Subject: [PATCH 2/5] [NFC] Group allocation and ID increment into SymExprAllocator This helps with two things: - make sure NextSymbolID is incremented on every allocation - No SymExpr can be allocated without a unique ID --- .../Core/PathSensitive/SymbolManager.h | 48 ++++++++++++++----- .../lib/StaticAnalyzer/Core/SymbolManager.cpp | 30 ++++-------- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index e6d7f5a37130d1..bb3423a1860cc0 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -43,15 +43,16 @@ class StoreManager; class SymbolRegionValue : public SymbolData { const TypedValueRegion *R; -public: + friend class SymExprAllocator; SymbolRegionValue(SymbolID sym, const TypedValueRegion *r) : SymbolData(SymbolRegionValueKind, sym), R(r) { assert(r); assert(isValidTypeForSymbol(r->getValueType())); } +public: LLVM_ATTRIBUTE_RETURNS_NONNULL - const TypedValueRegion* getRegion() const { return R; } + const TypedValueRegion *getRegion() const { return R; } static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) { profile.AddInteger((unsigned) SymbolRegionValueKind); @@ -84,7 +85,7 @@ class SymbolConjured : public SymbolData { const LocationContext *LCtx; const void *SymbolTag; -public: + friend class SymExprAllocator; SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx, QualType t, unsigned count, const void *symbolTag) : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count), @@ -98,6 +99,7 @@ class SymbolConjured : public SymbolData { assert(isValidTypeForSymbol(t)); } +public: /// It might return null. const Stmt *getStmt() const { return S; } unsigned getCount() const { return Count; } @@ -137,7 +139,7 @@ class SymbolDerived : public SymbolData { SymbolRef parentSymbol; const TypedValueRegion *R; -public: + friend class SymExprAllocator; SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r) : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) { assert(parent); @@ -145,6 +147,7 @@ class SymbolDerived : public SymbolData { assert(isValidTypeForSymbol(r->getValueType())); } +public: LLVM_ATTRIBUTE_RETURNS_NONNULL SymbolRef getParentSymbol() const { return parentSymbol; } LLVM_ATTRIBUTE_RETURNS_NONNULL @@ -180,12 +183,13 @@ class SymbolDerived : public SymbolData { class SymbolExtent : public SymbolData { const SubRegion *R; -public: + friend class SymExprAllocator; SymbolExtent(SymbolID sym, const SubRegion *r) : SymbolData(SymbolExtentKind, sym), R(r) { assert(r); } +public: LLVM_ATTRIBUTE_RETURNS_NONNULL const SubRegion *getRegion() const { return R; } @@ -222,7 +226,7 @@ class SymbolMetadata : public SymbolData { unsigned Count; const void *Tag; -public: + friend class SymExprAllocator; SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t, const LocationContext *LCtx, unsigned count, const void *tag) : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx), @@ -234,6 +238,7 @@ class SymbolMetadata : public SymbolData { assert(tag); } + public: LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion *getRegion() const { return R; } @@ -286,7 +291,7 @@ class SymbolCast : public SymExpr { /// The type of the result. QualType ToTy; -public: + friend class SymExprAllocator; SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To) : SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) { assert(In); @@ -295,6 +300,7 @@ class SymbolCast : public SymExpr { // Otherwise, 'To' should also be a valid type. } +public: unsigned computeComplexity() const override { if (Complexity == 0) Complexity = 1 + Operand->computeComplexity(); @@ -332,7 +338,7 @@ class UnarySymExpr : public SymExpr { UnaryOperator::Opcode Op; QualType T; -public: + friend class SymExprAllocator; UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op, QualType T) : SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) { @@ -346,6 +352,7 @@ class UnarySymExpr : public SymExpr { assert(!Loc::isLocType(T) && "unary symbol should be nonloc"); } +public: unsigned computeComplexity() const override { if (Complexity == 0) Complexity = 1 + Operand->computeComplexity(); @@ -426,7 +433,7 @@ class BinarySymExprImpl : public BinarySymExpr { LHSTYPE LHS; RHSTYPE RHS; -public: + friend class SymExprAllocator; BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) : BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) { @@ -434,6 +441,7 @@ class BinarySymExprImpl : public BinarySymExpr { assert(getPointer(rhs)); } +public: void dumpToStream(raw_ostream &os) const override { dumpToStreamImpl(os, LHS); dumpToStreamImpl(os, getOpcode()); @@ -479,6 +487,21 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *, using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *, SymExpr::Kind::SymSymExprKind>; +class SymExprAllocator { + SymbolID NextSymbolID = 0; + llvm::BumpPtrAllocator &Alloc; + +public: + explicit SymExprAllocator(llvm::BumpPtrAllocator &Alloc) : Alloc(Alloc) {} + + template <class SymT, typename... ArgsT> SymT *make(ArgsT &&...Args) { + return new (Alloc) SymT(nextID(), std::forward<ArgsT>(Args)...); + } + +private: + SymbolID nextID() { return NextSymbolID++; } +}; + class SymbolManager { using DataSetTy = llvm::FoldingSet<SymExpr>; using SymbolDependTy = @@ -490,15 +513,14 @@ class SymbolManager { /// alive as long as the key is live. SymbolDependTy SymbolDependencies; - unsigned SymbolCounter = 0; - llvm::BumpPtrAllocator& BPAlloc; + SymExprAllocator Alloc; BasicValueFactory &BV; ASTContext &Ctx; public: SymbolManager(ASTContext &ctx, BasicValueFactory &bv, - llvm::BumpPtrAllocator& bpalloc) - : SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {} + llvm::BumpPtrAllocator &bpalloc) + : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {} static bool canSymbolicate(QualType T); diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 573a401e62044d..738b6a175ce6de 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -170,9 +170,8 @@ SymbolManager::getRegionValueSymbol(const TypedValueRegion* R) { void *InsertPos; SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); if (!SD) { - SD = new (BPAlloc) SymbolRegionValue(SymbolCounter, R); + SD = Alloc.make<SymbolRegionValue>(R); DataSet.InsertNode(SD, InsertPos); - ++SymbolCounter; } return cast<SymbolRegionValue>(SD); @@ -188,9 +187,8 @@ const SymbolConjured* SymbolManager::conjureSymbol(const Stmt *E, void *InsertPos; SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); if (!SD) { - SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count, SymbolTag); + SD = Alloc.make<SymbolConjured>(E, LCtx, T, Count, SymbolTag); DataSet.InsertNode(SD, InsertPos); - ++SymbolCounter; } return cast<SymbolConjured>(SD); @@ -204,9 +202,8 @@ SymbolManager::getDerivedSymbol(SymbolRef parentSymbol, void *InsertPos; SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); if (!SD) { - SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R); + SD = Alloc.make<SymbolDerived>(parentSymbol, R); DataSet.InsertNode(SD, InsertPos); - ++SymbolCounter; } return cast<SymbolDerived>(SD); @@ -219,9 +216,8 @@ SymbolManager::getExtentSymbol(const SubRegion *R) { void *InsertPos; SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); if (!SD) { - SD = new (BPAlloc) SymbolExtent(SymbolCounter, R); + SD = Alloc.make<SymbolExtent>(R); DataSet.InsertNode(SD, InsertPos); - ++SymbolCounter; } return cast<SymbolExtent>(SD); @@ -236,9 +232,8 @@ SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T, void *InsertPos; SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos); if (!SD) { - SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count, SymbolTag); + SD = Alloc.make<SymbolMetadata>(R, S, T, LCtx, Count, SymbolTag); DataSet.InsertNode(SD, InsertPos); - ++SymbolCounter; } return cast<SymbolMetadata>(SD); @@ -252,9 +247,8 @@ SymbolManager::getCastSymbol(const SymExpr *Op, void *InsertPos; SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymbolCast(SymbolCounter, Op, From, To); + data = Alloc.make<SymbolCast>(Op, From, To); DataSet.InsertNode(data, InsertPos); - ++SymbolCounter; } return cast<SymbolCast>(data); @@ -269,9 +263,8 @@ const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymIntExpr(SymbolCounter, lhs, op, v, t); + data = Alloc.make<SymIntExpr>(lhs, op, v, t); DataSet.InsertNode(data, InsertPos); - ++SymbolCounter; } return cast<SymIntExpr>(data); @@ -286,9 +279,8 @@ const IntSymExpr *SymbolManager::getIntSymExpr(APSIntPtr lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) IntSymExpr(SymbolCounter, lhs, op, rhs, t); + data = Alloc.make<IntSymExpr>(lhs, op, rhs, t); DataSet.InsertNode(data, InsertPos); - ++SymbolCounter; } return cast<IntSymExpr>(data); @@ -304,9 +296,8 @@ const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs, SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) SymSymExpr(SymbolCounter, lhs, op, rhs, t); + data = Alloc.make<SymSymExpr>(lhs, op, rhs, t); DataSet.InsertNode(data, InsertPos); - ++SymbolCounter; } return cast<SymSymExpr>(data); @@ -320,9 +311,8 @@ const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand, void *InsertPos; SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos); if (!data) { - data = new (BPAlloc) UnarySymExpr(SymbolCounter, Operand, Opc, T); + data = Alloc.make<UnarySymExpr>(Operand, Opc, T); DataSet.InsertNode(data, InsertPos); - ++SymbolCounter; } return cast<UnarySymExpr>(data); >From 74be3aae75a84071e07f4cb86cacd3c9ff910c16 Mon Sep 17 00:00:00 2001 From: Arseniy Zaostrovnykh <necto...@gmail.com> Date: Thu, 2 Jan 2025 11:53:56 +0100 Subject: [PATCH 3/5] Use getSymbolID instead of raw pointer values for SymbolRef ordering --- .../Core/PathSensitive/SymbolManager.h | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index bb3423a1860cc0..2b31534ce1a9dd 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -25,6 +25,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include <cassert> @@ -710,4 +711,35 @@ class SymbolVisitor { } // namespace clang +// Override the default definition that would use pointer values of SymbolRefs +// to order them, which is unstable due to ASLR. +// Use the SymbolID instead which reflect the order in which the symbols were +// allocated. This is usually stable across runs leading to the stability of +// ConstraintMap and other containers using SymbolRef as keys. +template <> +struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef> + : public ImutProfileInfo<clang::ento::SymbolRef> { + using value_type = + typename ImutProfileInfo<clang::ento::SymbolRef>::value_type; + using value_type_ref = + typename ImutProfileInfo<clang::ento::SymbolRef>::value_type_ref; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; + + static key_type_ref KeyOfValue(value_type_ref D) { return D; } + static data_type_ref DataOfValue(value_type_ref) { return true; } + + static bool isEqual(key_type_ref LHS, key_type_ref RHS) { + return LHS->getSymbolID() == RHS->getSymbolID(); + } + + static bool isLess(key_type_ref LHS, key_type_ref RHS) { + return LHS->getSymbolID() < RHS->getSymbolID(); + } + + static bool isDataEqual(data_type_ref, data_type_ref) { return true; } +}; + #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H >From d32f6abb0404e319765b252c78272d7e2d818f2c Mon Sep 17 00:00:00 2001 From: Arseniy Zaostrovnykh <necto...@gmail.com> Date: Fri, 3 Jan 2025 17:40:42 +0100 Subject: [PATCH 4/5] [NFC] Explain why isDataEqual is necessary; inline type aliases. --- .../Core/PathSensitive/SymbolManager.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 2b31534ce1a9dd..b57f415ec139f8 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -719,10 +719,8 @@ class SymbolVisitor { template <> struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef> : public ImutProfileInfo<clang::ento::SymbolRef> { - using value_type = - typename ImutProfileInfo<clang::ento::SymbolRef>::value_type; - using value_type_ref = - typename ImutProfileInfo<clang::ento::SymbolRef>::value_type_ref; + using value_type = clang::ento::SymbolRef; + using value_type_ref = clang::ento::SymbolRef; using key_type = value_type; using key_type_ref = value_type_ref; using data_type = bool; @@ -731,14 +729,17 @@ struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef> static key_type_ref KeyOfValue(value_type_ref D) { return D; } static data_type_ref DataOfValue(value_type_ref) { return true; } - static bool isEqual(key_type_ref LHS, key_type_ref RHS) { + static bool isEqual(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) { return LHS->getSymbolID() == RHS->getSymbolID(); } - static bool isLess(key_type_ref LHS, key_type_ref RHS) { + static bool isLess(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) { return LHS->getSymbolID() < RHS->getSymbolID(); } + // This might seem redundant, but it is required because of the way + // ImmutableSet is implemented through AVLTree: + // same as ImmutableMap, but with a non-informative "data". static bool isDataEqual(data_type_ref, data_type_ref) { return true; } }; >From 355c588f446965c04c6a7ebccb3a303f5e1e50e4 Mon Sep 17 00:00:00 2001 From: Arseniy Zaostrovnykh <necto...@gmail.com> Date: Fri, 3 Jan 2025 17:51:02 +0100 Subject: [PATCH 5/5] [NFC] A doc comment explaining the SymExpr::getSymbolID() --- .../clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h index 2b6401eb05b72b..aca14cf813c4bc 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -68,6 +68,12 @@ class SymExpr : public llvm::FoldingSetNode { Kind getKind() const { return K; } + /// Get a unique identifier for this symbol. + /// The ID is unique across all SymExprs in a SymbolManager. + /// They reflect the allocation order of these SymExprs, + /// and are likely stable across runs. + /// Used as a key in SymbolRef containers and as part of identity + /// for SymbolData, e.g. SymbolConjured with ID = 7 is "conj_$7". SymbolID getSymbolID() const { return Sym; } virtual void dump() const; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits