Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-19 Thread Daniel Krupp via cfe-commits
dkrupp added a comment.

Thanks. Gabor, could you please merge this? I don't have commit right.


https://reviews.llvm.org/D24307



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68140

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH 1/2] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9..46b5f5e10f0e65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e44..45369785ed6924 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb..ae2ae5b23aab3c 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

>From 94fa4af57d28854df1c6ab3e3be2a7a902b620f1 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 10 Oct 2023 14:35:52 +0200
Subject: [PATCH 2/2] fixup!

---
 clang/docs/analyzer/checkers.rst  | 27 ---
 .../Checkers/VLASizeChecker.cpp   |  2 +-
 clang/test/Analysis/taint-generic.c   |  2 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d778782353..a48ba784ee9433 100644
--- a/clang/d

[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68140

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH 1/3] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9..46b5f5e10f0e65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e44..45369785ed6924 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb..ae2ae5b23aab3c 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

>From 94fa4af57d28854df1c6ab3e3be2a7a902b620f1 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 10 Oct 2023 14:35:52 +0200
Subject: [PATCH 2/3] fixup!

---
 clang/docs/analyzer/checkers.rst  | 27 ---
 .../Checkers/VLASizeChecker.cpp   |  2 +-
 clang/test/Analysis/taint-generic.c   |  2 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d778782353..a48ba784ee9433 100644
--- a/clang/d

[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68140

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH 1/4] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9..46b5f5e10f0e65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e44..45369785ed6924 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb..ae2ae5b23aab3c 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

>From 94fa4af57d28854df1c6ab3e3be2a7a902b620f1 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 10 Oct 2023 14:35:52 +0200
Subject: [PATCH 2/4] fixup!

---
 clang/docs/analyzer/checkers.rst  | 27 ---
 .../Checkers/VLASizeChecker.cpp   |  2 +-
 clang/test/Analysis/taint-generic.c   |  2 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d778782353..a48ba784ee9433 100644
--- a/clang/d

[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-18 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/66086

>From 889c886c3eed31335531ec61ad2b48bef15414d8 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 8 Sep 2023 16:57:49 +0200
Subject: [PATCH] [analyzer] TaintPropagation checker strlen() should not
 propagate

strlen(..) call should not propagate taintedness,
because it brings in many false positive findings.
It is a common pattern to copy user provided input
to another buffer. In these cases we always
get warnings about tainted data used as the malloc parameter:

buf = malloc(strlen(tainted_txt) + 1); // false warning

This pattern can lead to a denial of service attack only, when
the attacker can directly specify the size of the allocated area
as an arbitrary large number (e.g. the value is converted
from a user provided string).

Later, we could reintroduce strlen() as a taint propagating function
with the consideration not to emit warnings when the tainted value
cannot be "arbitrarily large" (such as the size of an already allocated
memory block).
---
 clang/docs/ReleaseNotes.rst   |  7 +
 clang/docs/analyzer/checkers.rst  |  4 +--
 .../Checkers/GenericTaintChecker.cpp  |  7 ++---
 .../test/Analysis/taint-diagnostic-visitor.c  | 13 +-
 clang/test/Analysis/taint-generic.c   | 26 +--
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3cdad2f7b9f0e5a..414cd7f62e2d764 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -409,6 +409,13 @@ Static Analyzer
   bitwise shift operators produce undefined behavior (because some operand is
   negative or too large).
 
+- The ``alpha.security.taint.TaintPropagation`` checker no longer propagates
+  taint on ``strlen`` and ``strnlen`` calls, unless these are marked
+  explicitly propagators in the user-provided taint configuration file.
+  This removal empirically reduces the number of false positive reports.
+  Read the PR for the details.
+  (`#66086 `_)
+
 .. _release-notes-sanitizers:
 
 Sanitizers
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 54ea49e7426cc86..dbd6d7787823530 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2599,8 +2599,8 @@ Default propagations rules:
  ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``,
  ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``,
  ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``,
- ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``,
- ``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
+ ``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``,
+ ``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
  ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``,
  ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth``
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 5da0f34b3d0464f..dae8ff0c5c8f1b8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -695,9 +695,10 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+
+  // strlen, wcslen, strnlen and alike intentionally don't propagate taint.
+  // See the details here: https://github.com/llvm/llvm-project/pull/66086
+
   {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
   {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
   {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index f1b9ceebdd9a6b8..8a7510177f3e444 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -10,6 +10,7 @@ int scanf(const char *restrict format, ...);
 int system(const char *command);
 char* getenv( const char* env_var );
 size_t strlen( const char* str );
+int atoi( const char* str );
 void *malloc(size_t size );
 void free( void *ptr );
 char *fgets(char *str, int n, FILE *stream);
@@ -54,11 +55,11 @@ void taintDiagnosticVLA(void) {
 // propagating through variables and expressions
 char *taintDiagnosticPropagation(){
   char *pathbuf;
-  char *pathlist=getenv("PATH"); // expected-note {{Taint 

[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-18 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

> As I'm not a maintainer, I could not push to your branch. Here is a patch 
> that I think has the missing pieces to satisfy my review. 
> [0001-fixup-analyzer-TaintPropagation-checker-strlen-shoul.patch.txt](https://github.com/llvm/llvm-project/files/12645128/0001-fixup-analyzer-TaintPropagation-checker-strlen-shoul.patch.txt)
>  Apply it to your branch by `git am 
> 0001-fixup-analyzer-TaintPropagation-checker-strlen-shoul.patch.txt`. After 
> that, I'm okay to squash merge this PR, if you are also okay with my 
> suggestions.

Thanks for the suggestions. I squashed it.

https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-19 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2023-09-25 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/67352

This commit renames alpha.security.taint.TaintPropagation checker to 
optin.security.taint.TaintPropagation.

This checker was stabilized and improved by recent commits thus it's ready for 
production use.

The checker is placed in the optin package as it implements an optional 
security analysis.

Reports by the checker:

- memcached [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=memcached_1.6.8_ednikru_taint_dealpha_baseline&newcheck=memcached_1.6.8_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- tmux [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=tmux_2.6_ednikru_taint_dealpha_baseline&newcheck=tmux_2.6_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- twin [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=twin_v0.8.1_ednikru_taint_dealpha_baseline&newcheck=twin_v0.8.1_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- vim [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=vim_v8.2.1920_ednikru_taint_dealpha_baseline&newcheck=vim_v8.2.1920_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- openssl [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=openssl_openssl-3.0.0-alpha7_ednikru_taint_dealpha_baseline&newcheck=openssl_openssl-3.0.0-alpha7_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- sqlite [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=sqlite_version-3.33.0_ednikru_taint_dealpha_baseline&newcheck=sqlite_version-3.33.0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- ffmpeg [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=ffmpeg_n4.3.1_ednikru_taint_dealpha_baseline&newcheck=ffmpeg_n4.3.1_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- tinyxml [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=tinyxml2_8.0.0_ednikru_taint_dealpha_baseline&newcheck=tinyxml2_8.0.0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- libwebm [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=libwebm_libwebm-1.0.0.27_ednikru_taint_dealpha_baseline&newcheck=libwebm_libwebm-1.0.0.27_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
- xerces [0 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=xerces_v3.2.3_ednikru_taint_dealpha_baseline&newcheck=xerces_v3.2.3_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)

- postgres [3 
reports](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=postgres_REL_13_0_ednikru_taint_dealpha_baseline&newcheck=postgres_REL_13_0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New)
 
All reports are true positive in the sense that they correctly indicate that 
tainted data gets to the trusted source and the data path is correctly shown.

2 reports are indicating particularly interesting potential vulnerabilities. 
- [Untrusted data is passed to a system 
call](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=postgres_REL_13_0_ednikru_taint_dealpha_baseline&newcheck=postgres_REL_13_0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New&report-id=2721643&report-hash=29792212406eaa5c1c2ffac790fef9fa&report-filepath=%2afe-print.c)
- [Untrusted data is passed to a system 
call](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=postgres_REL_13_0_ednikru_taint_dealpha_baseline&newcheck=postgres_REL_13_0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New&report-id=2722532&report-hash=4483012d4ec028b6c325ba8cb05540e5&report-filepath=%2aprint.c)
If the if environment variables regarded untrusted, an attacker may be able to 
execute arbitrary command through popen.

The report [Untrusted data is used to specify the buffer 
size](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=postgres_REL_13_0_ednikru_taint_dealpha_baseline&newcheck=postgres_REL_13_0_ednikru_taint_dealpha_new&is-unique=on&diff-type=New&report-id=2717295&report-hash=2cbf6c15bcd80e8066497f61a8478aef&report-filepath=%2aexec.c)
is likely not a vulnerability, as an environment string is just copied to 
another buffer.









>From d1fcbb6dbb65ee029b7fbcc0d1278f589bfc250b Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 11 Sep 2023 14:16:11 +0200
Subject: [PATCH] [analyzer] Moving TaintPropagation checker out of alpha

This commit renames alpha.security.taint.TaintPropagation
checker to optin.security.taint.TaintPropagation.

This checker was stabilized and improved by recent commits
thus it's ready for production use.

The checker is placed in the optin package as it implements
an optional security analysis.
---
 clang/docs/analyzer/checkers.rst  | 492 +-
 .../user-docs/TaintAnalysisConfiguration.rst  |   4 +-
 .../clang/S

[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2023-10-03 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/68140

The checker reported a false positive on this code 

void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is coming 
from a tainted source and it cannot prove that it is positive.

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9b..46b5f5e10f0e65c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e444..45369785ed6924e 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb7..ae2ae5b23aab3c6 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 26b19a6 - [clang][analyzer]Fix non-effective taint sanitation

2023-07-21 Thread Daniel Krupp via cfe-commits

Author: Daniel Krupp
Date: 2023-07-21T15:11:13+02:00
New Revision: 26b19a67e5c398a30b26214544878ec364dc59af

URL: 
https://github.com/llvm/llvm-project/commit/26b19a67e5c398a30b26214544878ec364dc59af
DIFF: 
https://github.com/llvm/llvm-project/commit/26b19a67e5c398a30b26214544878ec364dc59af.diff

LOG: [clang][analyzer]Fix non-effective taint sanitation

There was a bug in alpha.security.taint.TaintPropagation checker
in Clang Static Analyzer.
Taint filtering could only sanitize const arguments.
After this patch, taint filtering is effective also
on non-const parameters.

Differential Revision: https://reviews.llvm.org/D155848

Added: 


Modified: 
clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
clang/test/Analysis/Inputs/taint-generic-config.yaml
clang/test/Analysis/taint-generic.c

Removed: 




diff  --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4bf8ce13e0572c..3dcb45c0b11038 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -926,7 +926,9 @@ void GenericTaintRule::process(const GenericTaintChecker 
&Checker,
   });
 
   /// Check for taint propagation sources.
-  /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
+  /// A rule will make the destination variables tainted if PropSrcArgs
+  /// is empty (taints the destination
+  /// arguments unconditionally), or if any of its signified
   /// args are tainted in context of the current CallEvent.
   bool IsMatching = PropSrcArgs.isEmpty();
   std::vector TaintedSymbols;
@@ -949,6 +951,8 @@ void GenericTaintRule::process(const GenericTaintChecker 
&Checker,
 }
   });
 
+  // Early return for propagation rules which dont match.
+  // Matching propagations, Sinks and Filters will pass this point.
   if (!IsMatching)
 return;
 
@@ -975,10 +979,13 @@ void GenericTaintRule::process(const GenericTaintChecker 
&Checker,
   Result = F.add(Result, I);
 }
 
+// Taint property gets lost if the variable is passed as a
+// non-const pointer or reference to a function which is
+// not inlined. For matching rules we want to preserve the taintedness.
 // TODO: We should traverse all reachable memory regions via the
 // escaping parameter. Instead of doing that we simply mark only the
 // referred memory region as tainted.
-if (WouldEscape(V, E->getType())) {
+if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, 
V)) {
   LLVM_DEBUG(if (!Result.contains(I)) {
 llvm::dbgs() << "PreCall<";
 Call.dump(llvm::dbgs());

diff  --git a/clang/test/Analysis/Inputs/taint-generic-config.yaml 
b/clang/test/Analysis/Inputs/taint-generic-config.yaml
index 39b52ccc32e67c..d025bb2a1ed3c8 100755
--- a/clang/test/Analysis/Inputs/taint-generic-config.yaml
+++ b/clang/test/Analysis/Inputs/taint-generic-config.yaml
@@ -69,6 +69,11 @@ Filters:
 Scope: "myAnotherNamespace::"
 Args:  [0]
 
+  # char *str; // str is tainted
+  # sanitizeCmd(str) // str is not tainted anymore
+  - Name: sanitizeCmd
+Args: [0]
+
 # A list of sink functions
 Sinks:
   # int x, y; // x and y are tainted

diff  --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index 62e1f570b6622a..84b7cc51dd6df8 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -1010,7 +1010,8 @@ void mySource2(int*);
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
-bool isOutOfRange(const int*);
+bool isOutOfRange(const int*); // const filter function
+void sanitizeCmd(char*); // non-const filter function
 void mySink(int, int, int);
 
 void testConfigurationSources1(void) {
@@ -1044,6 +1045,19 @@ void testConfigurationFilter(void) {
   Buffer[x] = 1; // no-warning
 }
 
+void testConfigurationFilterNonConst(void) {
+  char buffer[1000];
+  myScanf("%s", buffer); // makes buffer tainted
+  system(buffer); // expected-warning {{Untrusted data is passed to a system 
call}}
+}
+
+void testConfigurationFilterNonConst2(void) {
+  char buffer[1000];
+  myScanf("%s", buffer); // makes buffer tainted
+  sanitizeCmd(buffer); // removes taintedness
+  system(buffer); // no-warning
+}
+
 void testConfigurationSinks(void) {
   int x = mySource1();
   mySink(x, 1, 2);



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 4dbe2db - [clang][analyzer] Improved documentation for TaintPropagation Checker

2023-07-25 Thread Daniel Krupp via cfe-commits

Author: Daniel Krupp
Date: 2023-07-25T11:34:11+02:00
New Revision: 4dbe2db02d03ffee27feb43a6ef332ca6a3cbca2

URL: 
https://github.com/llvm/llvm-project/commit/4dbe2db02d03ffee27feb43a6ef332ca6a3cbca2
DIFF: 
https://github.com/llvm/llvm-project/commit/4dbe2db02d03ffee27feb43a6ef332ca6a3cbca2.diff

LOG: [clang][analyzer] Improved documentation for TaintPropagation Checker

The usage of the taint analysis is described through a command injection attack 
example.
It is explained how to make a variable sanitized through configuration.

Differential Revision: https://reviews.llvm.org/D145229

Added: 


Modified: 
clang/docs/analyzer/checkers.rst

Removed: 




diff  --git a/clang/docs/analyzer/checkers.rst 
b/clang/docs/analyzer/checkers.rst
index 73b4967b1ffdeb..97b5369ac86c9b 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2359,64 +2359,244 @@ pointer. These functions include: getenv, localeconv, 
asctime, setlocale, strerr
 alpha.security.taint
 
 
-Checkers implementing `taint analysis 
`_.
+Checkers implementing
+`taint analysis `_.
 
 .. _alpha-security-taint-TaintPropagation:
 
 alpha.security.taint.TaintPropagation (C, C++)
 ""
 
-Taint analysis identifies untrusted sources of information (taint sources), 
rules as to how the untrusted data flows along the execution path (propagation 
rules), and points of execution where the use of tainted data is risky (taints 
sinks).
+Taint analysis identifies potential security vulnerabilities where the
+attacker can inject malicious data to the program to execute an attack
+(privilege escalation, command injection, SQL injection etc.).
+
+The malicious data is injected at the taint source (e.g. ``getenv()`` call)
+which is then propagated through function calls and being used as arguments of
+sensitive operations, also called as taint sinks (e.g. ``system()`` call).
+
+One can defend agains this type of vulnerability by always checking and
+santizing the potentially malicious, untrusted user input.
+
+The goal of the checker is to discover and show to the user these potential
+taint source-sink pairs and the propagation call chain.
+
 The most notable examples of taint sources are:
 
-  - network originating data
+  - data from network
+  - files or standard input
   - environment variables
-  - database originating data
+  - data from databases
 
-``GenericTaintChecker`` is the main implementation checker for this rule, and 
it generates taint information used by other checkers.
+Let us examine a practical example of a Command Injection attack.
 
 .. code-block:: c
 
- void test() {
-   char x = getchar(); // 'x' marked as tainted
-   system(&x); // warn: untrusted data is passed to a system call
- }
+  // Command Injection Vulnerability Example
+  int main(int argc, char** argv) {
+char cmd[2048] = "/bin/cat ";
+char filename[1024];
+printf("Filename:");
+scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape 
here
+strcat(cmd, filename);
+system(cmd); // Warning: Untrusted data is passed to a system call
+  }
 
- // note: compiler internally checks if the second param to
- // sprintf is a string literal or not.
- // Use -Wno-format-security to suppress compiler warning.
- void test() {
-   char s[10], buf[10];
-   fscanf(stdin, "%s", s); // 's' marked as tainted
+The program prints the content of any user specified file.
+Unfortunately the attacker can execute arbitrary commands
+with shell escapes. For example with the following input the `ls` command is 
also
+executed after the contents of `/etc/shadow` is printed.
+`Input: /etc/shadow ; ls /`
 
-   sprintf(buf, s); // warn: untrusted data as a format string
- }
+The analysis implemented in this checker points out this problem.
 
- void test() {
-   size_t ts;
-   scanf("%zd", &ts); // 'ts' marked as tainted
-   int *p = (int *)malloc(ts * sizeof(int));
- // warn: untrusted data as buffer size
- }
+One can protect against such attack by for example checking if the provided
+input refers to a valid file and removing any invalid user input.
+
+.. code-block:: c
+
+  // No vulnerability anymore, but we still get the warning
+  void sanitizeFileName(char* filename){
+if (access(filename,F_OK)){// Verifying user input
+  printf("File does not exist\n");
+  filename[0]='\0';
+  }
+  }
+  int main(int argc, char** argv) {
+char cmd[2048] = "/bin/cat ";
+char filename[1024];
+printf("Filename:");
+scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape 
here
+sanitizeFileName(filename);// filename is safe after this point
+if (!filename[0])
+  return -1;
+strcat(cmd, filename);
+system(cmd); // Superflous Warning: U

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2023-10-09 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/68607

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the size tainted variable is 
bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the MallocChecker and 
CStringChecker checkers instead, where more sophisticated handling can be done 
taking into consideration buffer size and integer constraints.

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc84..b949cac504eddfe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks s

[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2023-10-09 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

@haoNoQ  thanks for pointing out #61826 umbrella issue, I somehow missed that.

I see this TaintPropagation checker as a generic flexible tool to find 
potential vulnerable data flows between any taint source and taint sink. The 
user should be configure sources and sinks in the yaml config file. The default 
settings will not always be correct. (Whether a file is a tainted source or 
not, depends a lot on the actual application domain and maybe the deployment.)
This checker warns for every sink unconditionally.

The user can explicitly mark a value as sanitized in these cases:
```
  scanf (" %1023[^\n]", filename);
  if (access(filename,F_OK)){// Verifying user input
printf("File does not exist\n");
return -1;
  }
  #ifdef __clang_analyzer__
csa_mark_sanitized(filename); // Indicating to CSA that filename variable 
is safe to be used after this point
  #endif
  strcat(cmd, filename);
  system(cmd); // No warning
```
See the example for details in the doc:
https://clang.llvm.org/docs/analyzer/checkers.html#alpha-security-taint-taintpropagation-c-c

Of course it is annoying to add such instructions for tainted integer values, 
for which the analyzer can perform bounds checking automatically and refute 
some trivial false positives cases which you also pointed out.

I saw many false positives for malloc(..) calls when tainted buffer was 
attempted to be copied to another buffer and fixed that in this PR (by removing 
taint propagation for strlen):
* https://github.com/llvm/llvm-project/pull/66086
For me it refuted most of the FPs on open source projects.

At the end, I agree with you that it is better to remove all the unconditional 
integer taint sinks from the TaintPropagation checker and handle it in the 
MallocChecker, CStringChecker to take integer constraints into consideration.
So this PR does that:
* https://github.com/llvm/llvm-project/pull/68607
I will make follow-up patches to re-add these warnings for `malloc()` and 
`memcpy()` etc. on the MallocChecker, CStringChecker with a more sophisticated 
handling.

The VLA case you pointed out I fixed in:
* https://github.com/llvm/llvm-project/pull/68140

In case of strings , it is very difficult (in general impossible?) to 
automatically recognize the programmer's attempt to santize the data.
So I think  `csa_mark_sanitized(string);` is an acceptable solution to get rid 
of unwanted warnings (or using a triaging tool).

I would add the `TaintPropagation` checker to the `optin` package as an 
optional security analysis checker which is not enabled by default, because its 
usage is very application area dependent. Some of the security concious 
applications would want to create the config file for their sources and sinks 
and even mark a few FPs, if they can catch a few vulnerabilities.

Does this approach sound acceptable to you?
>From which project did you see the ~300 false reports? 

https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2023-10-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/2] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc84..b949cac504eddfe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-  

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2023-10-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/3] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc84..b949cac504eddfe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-  

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2023-10-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/3] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc84..b949cac504eddfe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-  

[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2023-10-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68140

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH 1/2] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9b..46b5f5e10f0e65c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e444..45369785ed6924e 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb7..ae2ae5b23aab3c6 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

>From 94fa4af57d28854df1c6ab3e3be2a7a902b620f1 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 10 Oct 2023 14:35:52 +0200
Subject: [PATCH 2/2] fixup!

---
 clang/docs/analyzer/checkers.rst  | 27 ---
 .../Checkers/VLASizeChecker.cpp   |  2 +-
 clang/test/Analysis/taint-generic.c   |  2 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d7787823530..a48ba784ee94333 100644
--- a

[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-12 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/66086:

strlen(..) call should not propagate taintedness,
because it brings in many false positive findings. It is a common pattern to 
copy user provided input to another buffer. In these cases we always
get warnings about tainted data used as the malloc parameter:

buf = malloc(strlen(tainted_txt) + 1); // false warning

This pattern can lead to a denial of service attack only, when the attacker can 
directly specify the size of the allocated area as an arbitrary large number 
(e.g. the value is converted from a user provided string).

Later, we could reintroduce strlen() as a taint propagating function with the 
consideration not to emit warnings when the tainted value cannot be 
"arbitrarily large" (such as the size of an already allocated memory block).

The change has been evaluated on the following open source projects:

- memcached: [1 lost false 
positive](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=memcached_1.6.8_ednikru_taint_nostrlen_baseline&newcheck=memcached_1.6.8_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved)

- tmux: 0 lost reports
- twin [3 lost false 
positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=twin_v0.8.1_ednikru_taint_nostrlen_baseline&newcheck=twin_v0.8.1_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved)
- vim [1 lost false 
positive](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=vim_v8.2.1920_ednikru_taint_nostrlen_baseline&newcheck=vim_v8.2.1920_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved)
- openssl 0 lost reports
- sqliste [2 lost false 
positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=sqlite_version-3.33.0_ednikru_taint_nostrlen_baseline&newcheck=sqlite_version-3.33.0_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved)
- ffmpeg 0 lost repots
- postgresql [3 lost false 
positives](https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?run=postgres_REL_13_0_ednikru_taint_nostrlen_baseline&newcheck=postgres_REL_13_0_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved)
- tinyxml 0 lost reports
- libwebm 0 lost reports
- xerces 0 lost reports

In all cases the lost reports are originating from copying untrusted 
environment variables into another buffer.

There are 2 types of lost false positive reports:
1)  [Where the warning is emitted at the malloc call by the TaintPropagation 
Checker 
](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=memcached_1.6.8_ednikru_taint_nostrlen_baseline&newcheck=memcached_1.6.8_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved&report-id=2648506&report-hash=2079221954026f17e1ecb614f5f054db&report-filepath=%2amemcached.c)
`
len = strlen(portnumber_filename)+4+1;
temp_portnumber_filename = malloc(len);
`

2) When pointers are set based on the length of the tainted string by the 
ArrayOutofBoundsv2 checker.
For example [this 
](https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=vim_v8.2.1920_ednikru_taint_nostrlen_baseline&newcheck=vim_v8.2.1920_ednikru_taint_nostrlen_new&is-unique=on&diff-type=Resolved&report-id=2649310&report-hash=79dc8522d2cd34ca8e1b2dc2db64b2df&report-filepath=%2aos_unix.c)case.



>From 9c7674c39e1b07536f8c57bcdd2b07fb04b4873c Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 8 Sep 2023 16:57:49 +0200
Subject: [PATCH] [analyzer] TaintPropagation checker strlen() should not
 propagate

strlen(..) call should not propagate taintedness,
because it brings in many false positive findings.
It is a common pattern to copy user provided input
to another buffer. In these cases we always
get warnings about tainted data used as the malloc parameter:

buf = malloc(strlen(tainted_txt) + 1); // false warning

This pattern can lead to a denial of service attack only, when
the attacker can directly specify the size of the allocated area
as an arbitrary large number (e.g. the value is converted
from a user provided string).

Later, we could reintroduce strlen() as a taint propagating function
with the consideration not to emit warnings when the tainted value
cannot be "arbitrarily large" (such as the size of an already allocated
memory block).
---
 clang/docs/analyzer/checkers.rst   |  4 ++--
 .../Checkers/GenericTaintChecker.cpp   |  2 --
 clang/test/Analysis/taint-diagnostic-visitor.c | 13 +++--
 clang/test/Analysis/taint-generic.c| 18 --
 4 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 54ea49e7426cc86..dbd6d7787823530 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2599,8 +2599,8 @@ Default propagations rules:
  ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``,
  ``qsort_r``, ``rawmemchr``, `

[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-12 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp review_requested 
https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-12 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp review_requested 
https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp review_requested 
https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp review_requested 
https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-14 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

If we remove the malloc(..) as the taint sink, we would lose some true positive 
findings where the size of the allocated
area is specified directly as a number by the attacker:
```
char *size=getenv("SIZE");
if (size){
pathbuf=(char*) malloc(atoi(size)+1); // warn: denial of service attack!
...
}
```
The above example is prone to denial of service attack as the the attacker just 
specifies an arbitrarily large number to which a buffer will be allocated. The 
attacker needs much less resources to specify a large number than the recevier 
to allocate a large chuck of memory.

On the other hand when we have a code like this:
```
char *user_txt=getenv("SIZE");
if (user_txt){ 
pathbuf=(char*) malloc(strlen(user_txt)+1); // No Warning as the malloc 
parameter comes from the size of an already allocated buffer
...
}
```
Here we should not warn as the size passed to malloc is the size of an already 
allocated buffer. So invested resources by the attacker to provide the large 
string and the server allocating another buffer to contain that string is 
symmetrical. So not prone to DoS attack.

A more sophisticated longer term solution could be that we add a flag to the 
taint info (or introduce a taint type) that the tainted value was originating 
from an existing buffer size and then specify the malloc sink so that it should 
not warn in that case. I know we cannot do this know, but the taint analysis 
could be extended into this direction.

Back to this solution.
Please note that this is only the default configuration of the checker.
The user could add the stren as a propagator into the taint config file.
If we decide to remove strlen() as a propagator (as is in this patch) we could 
highlight this in the documentation of the checker that the  user may want to 
add it back.

So for me either solution would work:
a) remove strlen() as a propagator and note it in the checker doc
b) remove malloc() as a sink and note it in the checker doc
c) don't do anything and live with the false positives

Which one would you prefer?

https://github.com/llvm/llvm-project/pull/66086
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] TaintPropagation checker strlen() should not propagate (PR #66086)

2023-09-16 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/66086

>From f8997b16c74543eb57b272c4dd4abca1a10d9ac7 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 8 Sep 2023 16:57:49 +0200
Subject: [PATCH] [analyzer] TaintPropagation checker strlen() should not
 propagate

strlen(..) call should not propagate taintedness,
because it brings in many false positive findings.
It is a common pattern to copy user provided input
to another buffer. In these cases we always
get warnings about tainted data used as the malloc parameter:

buf = malloc(strlen(tainted_txt) + 1); // false warning

This pattern can lead to a denial of service attack only, when
the attacker can directly specify the size of the allocated area
as an arbitrary large number (e.g. the value is converted
from a user provided string).

Later, we could reintroduce strlen() as a taint propagating function
with the consideration not to emit warnings when the tainted value
cannot be "arbitrarily large" (such as the size of an already allocated
memory block).
---
 clang/docs/analyzer/checkers.rst|  4 ++--
 .../StaticAnalyzer/Checkers/GenericTaintChecker.cpp |  3 ---
 clang/test/Analysis/taint-diagnostic-visitor.c  | 13 +++--
 clang/test/Analysis/taint-generic.c | 10 +-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 54ea49e7426cc86..dbd6d7787823530 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2599,8 +2599,8 @@ Default propagations rules:
  ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``,
  ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``,
  ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``,
- ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``,
- ``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
+ ``strcspn``, ``strncasecmp``, ``strncmp``, ``strndup``,
+ ``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``,
  ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``,
  ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth``
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 5da0f34b3d0464f..95a759c251ca490 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -695,9 +695,6 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) 
const {
   {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"wcslen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
-  {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
   {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
   {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index f1b9ceebdd9a6b8..8a7510177f3e444 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -10,6 +10,7 @@ int scanf(const char *restrict format, ...);
 int system(const char *command);
 char* getenv( const char* env_var );
 size_t strlen( const char* str );
+int atoi( const char* str );
 void *malloc(size_t size );
 void free( void *ptr );
 char *fgets(char *str, int n, FILE *stream);
@@ -54,11 +55,11 @@ void taintDiagnosticVLA(void) {
 // propagating through variables and expressions
 char *taintDiagnosticPropagation(){
   char *pathbuf;
-  char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+  char *size=getenv("SIZE"); // expected-note {{Taint originated here}}
  // expected-note@-1 {{Taint propagated to the 
return value}}
-  if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+  if (size){ // expected-note {{Assuming 'size' is non-null}}
   // expected-note@-1 {{Taking true branch}}
-pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted 
data is used to specify the buffer size}}
+pathbuf=(char*) malloc(atoi(size)); // expected-warning{{Untrusted data is 
used to specify the buffer size}}
 // expected-note@-1{{Untrusted 
data is used to specify the buffer size}}
 // expected-note@-2 {{Taint 
propagated to the return value}}
 return pathbuf;
@@ -71,12 +72,12 @@ char *taintDiagnosticPropagation(){
 char *taintDiagnosticPropagation2(){
  

[clang] 343bdb1 - [analyzer] Show taint origin and propagation correctly

2023-04-26 Thread Daniel Krupp via cfe-commits

Author: Daniel Krupp
Date: 2023-04-26T12:43:36+02:00
New Revision: 343bdb10940cb2387c0b9bd3caccee7bb56c937b

URL: 
https://github.com/llvm/llvm-project/commit/343bdb10940cb2387c0b9bd3caccee7bb56c937b
DIFF: 
https://github.com/llvm/llvm-project/commit/343bdb10940cb2387c0b9bd3caccee7bb56c937b.diff

LOG: [analyzer] Show taint origin and propagation correctly

This patch improves the diagnostics of the alpha.security.taint.TaintPropagation
checker and taint related checkers by showing the "Taint originated here" note
at the correct place, where the attacker may inject it. This greatly improves
the understandability of the taint reports.

In the baseline the taint source was pointing to an invalid location, typically
somewhere between the real taint source and sink.

After the fix, the "Taint originated here" tag is correctly shown at the taint
source. This is the function call where the attacker can inject a malicious data
(e.g. reading from environment variable, reading from file, reading from
standard input etc.).

This patch removes the BugVisitor from the implementation and replaces it with 2
new NoteTags. One, in the taintOriginTrackerTag() prints the "taint originated
here" Note and the other in taintPropagationExplainerTag() explaining how the
taintedness is propagating from argument to argument or to the return value
("Taint propagated to the Xth argument"). This implementation uses the
interestingess BugReport utility to track back the tainted symbols through
propagating function calls to the point where the taintedness was introduced by
a source function call.

The checker which wishes to emit a Taint related diagnostic must use the
categories::TaintedData BugType category and must mark the tainted symbols as
interesting. Then the TaintPropagationChecker will automatically generate the
"Taint originated here" and the "Taint propagated to..." diagnostic notes.

Added: 


Modified: 
clang/include/clang/StaticAnalyzer/Checkers/Taint.h
clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
clang/lib/StaticAnalyzer/Checkers/Taint.cpp
clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
clang/test/Analysis/taint-diagnostic-visitor.c
clang/test/Analysis/taint-tester.c

Removed: 




diff  --git a/clang/include/clang/StaticAnalyzer/Checkers/Taint.h 
b/clang/include/clang/StaticAnalyzer/Checkers/Taint.h
index df863a2495413..3ec8dbfb09ee3 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Taint.h
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Taint.h
@@ -79,26 +79,48 @@ bool isTainted(ProgramStateRef State, SymbolRef Sym,
 bool isTainted(ProgramStateRef State, const MemRegion *Reg,
TaintTagType Kind = TaintTagGeneric);
 
+/// Returns the tainted Symbols for a given Statement and state.
+std::vector getTaintedSymbols(ProgramStateRef State, const Stmt *S,
+ const LocationContext *LCtx,
+ TaintTagType Kind = TaintTagGeneric);
+
+/// Returns the tainted Symbols for a given SVal and state.
+std::vector getTaintedSymbols(ProgramStateRef State, SVal V,
+ TaintTagType Kind = TaintTagGeneric);
+
+/// Returns the tainted Symbols for a SymbolRef and state.
+std::vector getTaintedSymbols(ProgramStateRef State, SymbolRef Sym,
+ TaintTagType Kind = TaintTagGeneric);
+
+/// Returns the tainted (index, super/sub region, symbolic region) symbols
+/// for a given memory region.
+std::vector getTaintedSymbols(ProgramStateRef State,
+ const MemRegion *Reg,
+ TaintTagType Kind = TaintTagGeneric);
+
+std::vector getTaintedSymbolsImpl(ProgramStateRef State,
+ const Stmt *S,
+ const LocationContext *LCtx,
+ TaintTagType Kind,
+ bool returnFirstOnly);
+
+std::vector getTaintedSymbolsImpl(ProgramStateRef State, SVal V,
+ TaintTagType Kind,
+ bool returnFirstOnly);
+
+std::vector getTaintedSymbolsImpl(ProgramStateRef State,
+ SymbolRef Sym, TaintTagType Kind,
+ bool returnFirstOnly);
+
+std::vector getTaintedSymbolsImpl(ProgramStateRef State,
+ const MemRegion *Reg,
+ TaintTagType 

[PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-07 Thread Daniel Krupp via cfe-commits
dkrupp created this revision.
dkrupp added reviewers: xazax.hun, NoQ, dcoughlin, zaks.anna.
dkrupp added a subscriber: cfe-commits.

ArrayBoundChecker did not detect out of bounds memory access errors in case an 
array was allocated by the new expression.

1.  MallocChecker.cpp was updated to calculate the extent size in Bytes 
similarly how it was done for memory regions allocated by malloc. The size 
constraint is added for arrays and non-arrays allocated by new.

2.  ArrayBoundCheckerV2.cpp was updated to better handle accessing locations 
preceding a symbolic memory region (such as buf[-1] in test2(..) in 
out-of-bounds.cpp). So computeExtentBegin(..) was updated to assume that the 
extent of a symbolic region starts at 0 if we know the size of the extent (as 
is the case in case of malloc or new).

3. out-of-bounds.cpp contains the relevant test cases for C++.

https://reviews.llvm.org/D24307

Files:
  lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  lib/StaticAnalyzer/Checkers/MallocChecker.cpp
  test/Analysis/out-of-bounds.cpp

Index: test/Analysis/out-of-bounds.cpp
===
--- /dev/null
+++ test/Analysis/out-of-bounds.cpp
@@ -0,0 +1,147 @@
+// RUN: %clang_cc1 -std=c++11 -Wno-array-bounds -analyze -analyzer-checker=unix,core,alpha.security.ArrayBoundV2 -verify %s
+
+// Tests doing an out-of-bounds access after the end of an array using:
+// - constant integer index
+// - constant integer size for buffer
+void test1(int x) {
+  int *buf = new int[100];
+  buf[100] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ok(int x) {
+  int *buf = new int[100];
+  buf[99] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access after the end of an array using:
+// - indirect pointer to buffer
+// - constant integer index
+// - constant integer size for buffer
+void test1_ptr(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[101] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_ok(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[99] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer, manipulated using simple pointer arithmetic
+// - constant integer index
+// - constant integer size for buffer
+void test1_ptr_arith(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 100;
+  p[0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_arith_ok(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[0] = 1; // no-warning
+}
+
+void test1_ptr_arith_bad(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_arith_ok2(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[-1] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - constant integer index
+// - constant integer size for buffer
+void test2(int x) {
+  int *buf = new int[100];
+  buf[-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer
+// - constant integer index
+// - constant integer size for buffer
+void test2_ptr(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer, manipulated using simple pointer arithmetic
+// - constant integer index
+// - constant integer size for buffer
+void test2_ptr_arith(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  --p;
+  p[0] = 1; // expected-warning {{Out of bound memory access (accessed memory precedes memory block)}}
+}
+
+// Tests doing an out-of-bounds access before the start of a multi-dimensional
+// array using:
+// - constant integer indices
+// - constant integer sizes for the array
+void test2_multi(int x) {
+  auto buf = new int[100][100];
+  buf[0][-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of a multi-dimensional
+// array using:
+// - constant integer indices
+// - constant integer sizes for the array
+void test2_multi_b(int x) {
+  auto buf = new int[100][100];
+  buf[-1][0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of a multi-dimensional
+// array using:
+// - constant integer indices
+// - constant integer sizes for the array
+void test2_multi_c(int x) {
+  auto buf = new int[100][100];
+  buf[100][0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of a multi-dimensional
+// array using:
+// - constant integer indices
+// - constant integer sizes f

Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-08 Thread Daniel Krupp via cfe-commits
dkrupp added inline comments.


Comment at: lib/StaticAnalyzer/Checkers/MallocChecker.cpp:1003
@@ +1002,3 @@
+//
+ProgramStateRef MallocChecker::addExtentSize(CheckerContext &C,
+ const CXXNewExpr *NE,

xazax.hun wrote:
> zaks.anna wrote:
> > I am not sure this code belongs to the malloc checker since it only 
> > supports the array bounds checker. Is there a reason it's not part of that 
> > checker?
> I think it is part of the malloc checker because it already does something 
> very very similar to malloc, see the MallocMemAux function. So in fact, for 
> the array bounds checker to work properly, the malloc checker should be 
> turned on.
Extent size is used by ArrayBoundChecker, ArrayBoundCheckerV2 and 
CStringChecker checkers currently. New expression in case of simple allocations 
(0 allocation) was already handled in Malloc checker , that's why I implemented 
it there. But I agree it feels odd that one has to switch on unix.Malloc 
checker to get the size of new allocated heap regions. Should I move this to 
ArrayBoundChecker or ArrayBoundCheckerV2?


https://reviews.llvm.org/D24307



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-09 Thread Daniel Krupp via cfe-commits
dkrupp added inline comments.


Comment at: lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp:83
@@ -78,1 +82,3 @@
+  // we can assume that the region starts at 0.
+  if (!state->isNull(extentVal).isConstrained()) {
 return UnknownVal();

NoQ wrote:
> Perhaps you could consider the memory space of the `region`, it would look a 
> bit less hacky to me.
> 
> In my dreams, i wish heap regions were no longer symbolic regions, and this 
> hack would go away then.
> 
> Also, i recall there is a bug in `isNull()`: in the `ConstraintManager` class 
> (this time i actually mean //the abstract base class// of 
> `RangeConstraintManager`) this function boils down to `assume()`, but in 
> `RangeConstraintManager` it is overridden to do a direct lookup into the 
> constraint map; which means that in fact this function does not simplify 
> symbolic expressions before answering. This code is probably unaffected 
> because extents are always either concrete or atomic symbols, but i think i'd 
> make a patch for that.
Good point!
region->getMemorySpace() does a very similar recursion as the while loop in 
this function. So I  guess the while loop can be refactored like this:

```
static SVal computeExtentBegin(SValBuilder &svalBuilder, 
const MemRegion *region) {
  const MemSpaceRegion *SR = region->getMemorySpace();
  if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
return UnknownVal();
  else
return svalBuilder.makeZeroArrayIndex();
 }
```
All test cases pass. Particularly it filters out this false positive from 
out-of-bounds.c :

```
// Don't warn when indexing below the start of a symbolic region's whose 
// base extent we don't know.
int *get_symbolic();
void test_index_below_symboloc() {
  int *buf = get_symbolic();
  buf[-1] = 0; // no-warning;
}
```



https://reviews.llvm.org/D24307



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-09 Thread Daniel Krupp via cfe-commits
dkrupp updated this revision to Diff 70821.
dkrupp added a comment.

I tried to address all your comments.

1. computeExtentBegin() is greatly simplified.
2. addExtendSize() is simplified (scaleValue() function inlined)
3. new testcases added a) allocation and indexing of non-array element (int *ip 
=new int;) b) allocation of array with run-time size


https://reviews.llvm.org/D24307

Files:
  lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  lib/StaticAnalyzer/Checkers/MallocChecker.cpp
  test/Analysis/out-of-bounds-new.cpp

Index: test/Analysis/out-of-bounds-new.cpp
===
--- /dev/null
+++ test/Analysis/out-of-bounds-new.cpp
@@ -0,0 +1,150 @@
+// RUN: %clang_cc1 -std=c++11 -Wno-array-bounds -analyze -analyzer-checker=unix,core,alpha.security.ArrayBoundV2 -verify %s
+
+// Tests doing an out-of-bounds access after the end of an array using:
+// - constant integer index
+// - constant integer size for buffer
+void test1(int x) {
+  int *buf = new int[100];
+  buf[100] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ok(int x) {
+  int *buf = new int[100];
+  buf[99] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access after the end of an array using:
+// - indirect pointer to buffer
+// - constant integer index
+// - constant integer size for buffer
+void test1_ptr(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[101] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_ok(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[99] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer, manipulated using simple pointer arithmetic
+// - constant integer index
+// - constant integer size for buffer
+void test1_ptr_arith(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 100;
+  p[0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_arith_ok(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[0] = 1; // no-warning
+}
+
+void test1_ptr_arith_bad(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+void test1_ptr_arith_ok2(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p = p + 99;
+  p[-1] = 1; // no-warning
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - constant integer index
+// - constant integer size for buffer
+void test2(int x) {
+  int *buf = new int[100];
+  buf[-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer
+// - constant integer index
+// - constant integer size for buffer
+void test2_ptr(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  p[-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests doing an out-of-bounds access before the start of an array using:
+// - indirect pointer to buffer, manipulated using simple pointer arithmetic
+// - constant integer index
+// - constant integer size for buffer
+void test2_ptr_arith(int x) {
+  int *buf = new int[100];
+  int *p = buf;
+  --p;
+  p[0] = 1; // expected-warning {{Out of bound memory access (accessed memory precedes memory block)}}
+}
+
+// Tests under-indexing
+// of a multi-dimensional array
+void test2_multi(int x) {
+  auto buf = new int[100][100];
+  buf[0][-1] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests under-indexing
+// of a multi-dimensional array
+void test2_multi_b(int x) {
+  auto buf = new int[100][100];
+  buf[-1][0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests over-indexing
+// of a multi-dimensional array
+void test2_multi_c(int x) {
+  auto buf = new int[100][100];
+  buf[100][0] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests over-indexing
+// of a multi-dimensional array
+void test2_multi_2(int x) {
+  auto buf = new int[100][100];
+  buf[99][100] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests normal access of
+// a multi-dimensional array
+void test2_multi_ok(int x) {
+  auto buf = new int[100][100];
+  buf[0][0] = 1; // no-warning
+}
+
+// Tests over-indexing using different types
+// array
+void test_diff_types(int x) {
+  int *buf = new int[10]; //10*sizeof(int) Bytes allocated
+  char *cptr = (char *)buf;
+  cptr[sizeof(int) * 9] = 1;  // no-warning
+  cptr[sizeof(int) * 10] = 1; // expected-warning{{Out of bound memory access}}
+}
+
+// Tests over-indexing
+//if the allocated area is non-array
+void test_non_array(int x) {
+  int *ip = new int;
+  ip[0] = 1; // no-warning
+  ip[1] = 2; // expected-warning{{Out of bound memory access}}
+}
+
+//Tests over-indexing
+//if the allocated area size is a runtime parameter
+void test_dynamic_size(int s) {
+  int *buf = new int[s];
+  b

Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-09 Thread Daniel Krupp via cfe-commits
dkrupp added inline comments.


Comment at: lib/StaticAnalyzer/Checkers/MallocChecker.cpp:1011
@@ +1010,3 @@
+// containing the elements.
+Region = (State->getSVal(NE, LCtx))
+ .getAsRegion()

MemRegion has now method called castAs<>, only getAs<>, so I stayed with it.


Comment at: lib/StaticAnalyzer/Checkers/MallocChecker.cpp:1020
@@ +1019,3 @@
+  }
+  assert(Region);
+

I changed the type of Region to SubRegion, hope this is clearer this way.


Comment at: lib/StaticAnalyzer/Checkers/MallocChecker.cpp:1043
@@ -988,3 +1042,3 @@
 void MallocChecker::checkPreStmt(const CXXDeleteExpr *DE,
  CheckerContext &C) const {
 

now inlined


https://reviews.llvm.org/D24307



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D24307: calculate extent size for memory regions allocated by C++ new expression

2016-09-12 Thread Daniel Krupp via cfe-commits
dkrupp marked 11 inline comments as done.
dkrupp added a comment.

issues fixed


https://reviews.llvm.org/D24307



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D10305: [Clang Static Analyzer] Bug identification

2015-09-22 Thread Daniel Krupp via cfe-commits
dkrupp added a comment.

Hi,

Regarding testing:
I think we should create a RecursiveASTvistor based "test checker" that matches 
every statement and declaration and reports a bug there.
Then we could create a test file similar to what we have in 
/tools/clang/test/Analysis/diagnostics/report-issues-within-main-file.cpp
where the expected plist output can be written at the end of the file.

I am not sure though where to register this test "test checker". Should it be a 
dynamically loadable checker similar to 
/tools/clang/examples/analyzer-plugin/MainCallChecker.cpp or it should be a 
debug checker like (debug.DumpCalls)?

The advantage of the dynamic lib based solution is that we would not need to 
statically add it to clang-sa.

What do you think?

Regards,
Daniel


http://reviews.llvm.org/D10305



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D12906: [RFC] Bug identification("issue_hash") change for CmpRuns.py

2015-10-21 Thread Daniel Krupp via cfe-commits
dkrupp added a comment.

In http://reviews.llvm.org/D12906#272243, @zaks.anna wrote:

> > > In http://reviews.llvm.org/D10305#224956, @zaks.anna wrote:
>
> > 
>
> > >  For example, you could keep the information about the reports in the 
> > > plist files and use those to 
>
> > 
>
> > >  render the reports in HTML.
>
> > 
>
> > 
>
> > If you're okay with adding HTML file name in plist for each bug, I will 
> > prepare a new patch for that.
>
> >  Thanks for the review!
>
>
> I think you misunderstood my comment. I am not talking about using the 
> existing HTML files here but rather having an HTML viewer, which you could 
> use to browse source code. This viewer would be extended to read the bug 
> reports from the plist files and display them. Currently, we create an html 
> file with source code + report info for each bug report. This does not scale 
> when you have a lot of reports on a single large file (ex: sqlite).
>
> What I describe above is a larger project. What workflow are you trying to 
> support? I think adding the issue hash to the HTML file is fine if you find 
> it to be useful for your workflow...


Hi Anna & Kim,

we recognized these scalability issues you just described and that's why we 
created CodeChecker https://github.com/Ericsson/codechecker/
tool. A HTML report viewer for Clang SA.

Reports are stored in a postgresql db. Each source file is only stored once 
(unlike scanbuild), supports bug suppression and diff view between runs etc.
And it works pretty fast with many-million lines of code projects.

If you have time, give it a try. we are happy to get feedback...
Regards,
Daniel


http://reviews.llvm.org/D12906



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D12906: [RFC] Bug identification("issue_hash") change for CmpRuns.py

2015-10-22 Thread Daniel Krupp via cfe-commits
dkrupp added a comment.

Hi,

its a good idea to include in LLVM/Clang i will propose it

In http://reviews.llvm.org/D12906#272265, @zaks.anna wrote:

> Hi Daniel,
>
> Have you considered contributing this work to clang/llvm?


It's a good idea I will propose this at cfe-dev.

Daniel


http://reviews.llvm.org/D12906



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-23 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

I executed the analysis with this patch on the following open source projects: 
memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres, xerces

And it did not bring any visible change in the reports. So there were no new or 
resolved findings compared to the baseline.
In both the baseline and the new analysis execution the  
alpha.security.taint.TaintPropagation and the core.VLASize checkers were 
enabled.

Link to the diff:
https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?review-status=Unreviewed&review-status=Confirmed%20bug&detection-status=New&detection-status=Reopened&detection-status=Unresolved&run=%2avla_taint_baseline&is-unique=off&newcheck=%2avla_taint_new&diff-type=New&checker-msg=%2ataint%2a

https://github.com/llvm/llvm-project/pull/68140
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-23 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68140

>From 4b310278d2923ff718d074a7f7c8806ad03c6401 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 3 Oct 2023 19:58:28 +0200
Subject: [PATCH 1/5] [analyzer] Fix core.VLASize checker false positive taint
 reports

The checker reported a false positive on this code
void testTaintedSanitizedVLASize(void) {
  int x;
  scanf("%d", &x);
  if (x<1)
return;
  int vla[x]; // no-warning
}

After the fix, the checker only emits tainted warning if the vla size is
coming from a tainted source and it cannot prove that it is positive.
---
 .../StaticAnalyzer/Checkers/VLASizeChecker.cpp   | 16 
 clang/test/Analysis/taint-diagnostic-visitor.c   |  4 ++--
 clang/test/Analysis/taint-generic.c  | 11 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index b195d912cadfe9..46b5f5e10f0e65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -162,12 +162,6 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   if (SizeV.isUnknown())
 return nullptr;
 
-  // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-reportTaintBug(SizeE, State, C, SizeV);
-return nullptr;
-  }
-
   // Check if the size is zero.
   DefinedSVal SizeD = SizeV.castAs();
 
@@ -189,10 +183,10 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
   DefinedOrUnknownSVal Zero = SVB.makeZeroVal(SizeTy);
 
   SVal LessThanZeroVal = SVB.evalBinOp(State, BO_LT, SizeD, Zero, SizeTy);
+  ProgramStateRef StatePos, StateNeg;
   if (std::optional LessThanZeroDVal =
   LessThanZeroVal.getAs()) {
 ConstraintManager &CM = C.getConstraintManager();
-ProgramStateRef StatePos, StateNeg;
 
 std::tie(StateNeg, StatePos) = CM.assumeDual(State, *LessThanZeroDVal);
 if (StateNeg && !StatePos) {
@@ -202,6 +196,12 @@ ProgramStateRef 
VLASizeChecker::checkVLAIndexSize(CheckerContext &C,
 State = StatePos;
   }
 
+  // Check if the size is tainted.
+  if ((StateNeg || StateZero) && isTainted(State, SizeV)) {
+reportTaintBug(SizeE, State, C, SizeV);
+return nullptr;
+  }
+
   return State;
 }
 
@@ -220,7 +220,7 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, 
ProgramStateRef State,
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
   os << "Declared variable-length array (VLA) ";
-  os << "has tainted size";
+  os << "has a tainted (attacker controlled) size, that can be 0 or negative";
 
   auto report = std::make_unique(*TaintBT, os.str(), 
N);
   report->addRange(SizeE->getSourceRange());
diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c 
b/clang/test/Analysis/taint-diagnostic-visitor.c
index 8a7510177f3e44..45369785ed6924 100644
--- a/clang/test/Analysis/taint-diagnostic-visitor.c
+++ b/clang/test/Analysis/taint-diagnostic-visitor.c
@@ -46,8 +46,8 @@ void taintDiagnosticVLA(void) {
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
// expected-note@-1 {{Taint originated here}}
// expected-note@-2 {{Taint propagated to the 2nd argument}}
-  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has 
tainted size}}
-  // expected-note@-1 {{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning {{Declared variable-length array (VLA) has a 
tainted}}
+  // expected-note@-1 {{Declared variable-length array (VLA) has a 
tainted}}
 }
 
 
diff --git a/clang/test/Analysis/taint-generic.c 
b/clang/test/Analysis/taint-generic.c
index c6a01594f15abb..ae2ae5b23aab3c 100644
--- a/clang/test/Analysis/taint-generic.c
+++ b/clang/test/Analysis/taint-generic.c
@@ -405,7 +405,16 @@ int testDivByZero(void) {
 void testTaintedVLASize(void) {
   int x;
   scanf("%d", &x);
-  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has 
tainted size}}
+  int vla[x]; // expected-warning{{Declared variable-length array (VLA) has a 
tainted (attacker controlled) size, that can be 0 or negative}}
+}
+
+// Tainted-sanitized VLAs.
+void testTaintedSanitizedVLASize(void) {
+  int x;
+  scanf("%d", &x);
+  if (x<1)
+return;
+  int vla[x]; // no-warning
 }
 
 int testTaintedAllocaMem() {

>From 94fa4af57d28854df1c6ab3e3be2a7a902b620f1 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 10 Oct 2023 14:35:52 +0200
Subject: [PATCH 2/5] fixup!

---
 clang/docs/analyzer/checkers.rst  | 27 ---
 .../Checkers/VLASizeChecker.cpp   |  2 +-
 clang/test/Analysis/taint-generic.c   |  2 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dbd6d778782353..a48ba784ee9433 100644
--- a/clang/d

[clang] [analyzer] Fix core.VLASize checker false positive taint reports (PR #68140)

2024-02-23 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/68140
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][analyzer] Improve PointerSubChecker (PR #96501)

2024-07-04 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

> > Even protobuf contains this type of code: 
> > https://codechecker-demo.eastus.cloudapp.azure.com/Default/report-detail?run=protobuf_v3.13.0_pointersub1&is-unique=on&diff-type=New&checker-name=alpha.core.PointerSub&report-id=5545776&report-hash=1bcd310fbaeccbcc13645b9b277239a2&report-filepath=%2adescriptor.pb.cc
> 
> I still think that this (1) is undeniably undefined behavior (2) isn't 
> common, so won't cause "spam" problems and (3( can be replaced by 
> standard-compliant code (`offsetof`) so there is no need to introduce a 
> special case for it.

I agree with @NagyDonat that we don't need special handling of this case in the 
code, however I think the checker [documentation 
](https://clang.llvm.org/docs/analyzer/checkers.html#alpha-core-pointersub-c) 
should be extended with the description of this special case as it may be a 
surprising warning from the checker with an example. Specifically that it warns 
for cases where two pointers are subtracted which point to members of the same 
struct and suggest the usage of the standard compliant solution: offsetof.

So please describe which pointer subtractions the checker accepts and which 
don't (with examples) and a reference to the standard where it describes the 
undefined behaviour.


https://github.com/llvm/llvm-project/pull/96501
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-09-09 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

@haoNoQ gentle ping. Could you please check if this would be good to be merged 
now? thanks.

https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-15 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/67352

>From 21a917403c180d74ec7ac4cf9f15b3c5a8de8b7d Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Thu, 15 Aug 2024 14:24:35 +0200
Subject: [PATCH] [analyzer] Moving TaintPropagation and GenericTaint checkers
 out of alpha

alpha.security.taint.TaintPropagation
modeling checker is renamed to optin.taint.TaintPropagation.

alpha.security.taint.GenericTaint
user facing checker is renamed to optin.taint.genericTaint

These checkers were stabilized and improved by recent commits,
thus it's ready for (optional) production use.

The checker is placed in the optin package as it implements
an optional security analysis.
---
 clang/docs/analyzer/checkers.rst  | 470 +-
 .../user-docs/TaintAnalysisConfiguration.rst  |   4 +-
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  43 +-
 clang/test/Analysis/analyzer-config.c |   2 +-
 .../Analysis/assume-controlled-environment.c  |   4 +-
 clang/test/Analysis/bool-assignment.c |   4 +-
 clang/test/Analysis/cxx-method-names.cpp  |   2 +-
 .../Analysis/debug-exprinspection-istainted.c |   2 +-
 .../sarif-diagnostics-taint-test.c.sarif  |   2 +-
 .../sarif-multi-diagnostic-test.c.sarif   |   2 +-
 .../sarif-diagnostics-taint-test.c|   2 +-
 .../diagnostics/sarif-multi-diagnostic-test.c |   3 +-
 clang/test/Analysis/fread.c   |   2 +-
 .../global-region-invalidation-errno.c|   4 +-
 .../Analysis/global-region-invalidation.c |   2 +-
 clang/test/Analysis/malloc.c  |   2 +-
 clang/test/Analysis/malloc.cpp|   8 +-
 .../test/Analysis/out-of-bounds-diagnostics.c |   2 +-
 clang/test/Analysis/out-of-bounds-notes.c |   2 +-
 clang/test/Analysis/redefined_system.c|   2 +-
 clang/test/Analysis/string.c  |   2 +-
 ...nt-checker-callback-order-has-definition.c |   2 +-
 ...hecker-callback-order-without-definition.c |   2 +-
 .../test/Analysis/taint-diagnostic-visitor.c  |   2 +-
 clang/test/Analysis/taint-dumps.c |   2 +-
 clang/test/Analysis/taint-generic.c   |  26 +-
 clang/test/Analysis/taint-generic.cpp |   2 +-
 clang/test/Analysis/taint-tester.c|   2 +-
 clang/test/Analysis/taint-tester.cpp  |   3 +-
 clang/test/Analysis/taint-tester.m|   6 +-
 clang/utils/analyzer/SATestBuild.py   |   2 +-
 31 files changed, 304 insertions(+), 311 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 0bfbc995579d41..7abbe7dfe3c44c 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -992,6 +992,241 @@ optin.portability.UnixAPI
 "
 Finds implementation-defined behavior in UNIX/Posix functions.
 
+
+optin.taint
+
+
+Checkers implementing
+`taint analysis `_.
+
+.. _optin-taint-GenericTaint:
+
+optin.taint.GenericTaint (C, C++)
+""
+
+Taint analysis identifies potential security vulnerabilities where the
+attacker can inject malicious data to the program to execute an attack
+(privilege escalation, command injection, SQL injection etc.).
+
+The malicious data is injected at the taint source (e.g. ``getenv()`` call)
+which is then propagated through function calls and being used as arguments of
+sensitive operations, also called as taint sinks (e.g. ``system()`` call).
+
+One can defend against this type of vulnerability by always checking and
+sanitizing the potentially malicious, untrusted user input.
+
+The goal of the checker is to discover and show to the user these potential
+taint source-sink pairs and the propagation call chain.
+
+The most notable examples of taint sources are:
+
+  - data from network
+  - files or standard input
+  - environment variables
+  - data from databases
+
+Let us examine a practical example of a Command Injection attack.
+
+.. code-block:: c
+
+  // Command Injection Vulnerability Example
+  int main(int argc, char** argv) {
+char cmd[2048] = "/bin/cat ";
+char filename[1024];
+printf("Filename:");
+scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape 
here
+strcat(cmd, filename);
+system(cmd); // Warning: Untrusted data is passed to a system call
+  }
+
+The program prints the content of any user specified file.
+Unfortunately the attacker can execute arbitrary commands
+with shell escapes. For example with the following input the `ls` command is 
also
+executed after the contents of `/etc/shadow` is printed.
+`Input: /etc/shadow ; ls /`
+
+The analysis implemented in this checker points out this problem.
+
+One can protect against such attack by for example checking if the provided
+input refers to a valid file and removing any invalid user input.
+
+.. code-block:: c
+
+  // No vulnerability anymore, but we

[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-16 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/67352

>From 11b85a494bfc844d9474efd2c9679cc5c0f4f889 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Thu, 15 Aug 2024 14:24:35 +0200
Subject: [PATCH] [analyzer] Moving TaintPropagation and GenericTaint checkers
 out of alpha

alpha.security.taint.TaintPropagation
modeling checker is renamed to optin.taint.TaintPropagation.

alpha.security.taint.GenericTaint
user facing checker is renamed to optin.taint.genericTaint

These checkers were stabilized and improved by recent commits,
thus it's ready for (optional) production use.

The checker is placed in the optin package as it implements
an optional security analysis.
---
 clang/docs/analyzer/checkers.rst  | 472 +-
 .../user-docs/TaintAnalysisConfiguration.rst  |   4 +-
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  43 +-
 clang/test/Analysis/analyzer-config.c |   2 +-
 .../Analysis/assume-controlled-environment.c  |   4 +-
 clang/test/Analysis/bool-assignment.c |   4 +-
 clang/test/Analysis/cxx-method-names.cpp  |   2 +-
 .../Analysis/debug-exprinspection-istainted.c |   2 +-
 .../sarif-diagnostics-taint-test.c.sarif  |   2 +-
 .../sarif-multi-diagnostic-test.c.sarif   |   2 +-
 .../sarif-diagnostics-taint-test.c|   2 +-
 .../diagnostics/sarif-multi-diagnostic-test.c |   3 +-
 clang/test/Analysis/fread.c   |   2 +-
 .../global-region-invalidation-errno.c|   4 +-
 .../Analysis/global-region-invalidation.c |   2 +-
 clang/test/Analysis/malloc.c  |   2 +-
 clang/test/Analysis/malloc.cpp|   8 +-
 .../test/Analysis/out-of-bounds-diagnostics.c |   2 +-
 clang/test/Analysis/out-of-bounds-notes.c |   2 +-
 clang/test/Analysis/redefined_system.c|   2 +-
 clang/test/Analysis/string.c  |   2 +-
 ...nt-checker-callback-order-has-definition.c |   2 +-
 ...hecker-callback-order-without-definition.c |   2 +-
 .../test/Analysis/taint-diagnostic-visitor.c  |   2 +-
 clang/test/Analysis/taint-dumps.c |   2 +-
 clang/test/Analysis/taint-generic.c   |  26 +-
 clang/test/Analysis/taint-generic.cpp |   2 +-
 clang/test/Analysis/taint-tester.c|   2 +-
 clang/test/Analysis/taint-tester.cpp  |   3 +-
 clang/test/Analysis/taint-tester.m|   6 +-
 clang/utils/analyzer/SATestBuild.py   |   2 +-
 31 files changed, 305 insertions(+), 312 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 0bfbc995579d41..7310f1be623438 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -992,6 +992,241 @@ optin.portability.UnixAPI
 "
 Finds implementation-defined behavior in UNIX/Posix functions.
 
+
+optin.taint
+
+
+Checkers implementing
+`taint analysis `_.
+
+.. _optin-taint-GenericTaint:
+
+optin.taint.GenericTaint (C, C++)
+""
+
+Taint analysis identifies potential security vulnerabilities where the
+attacker can inject malicious data to the program to execute an attack
+(privilege escalation, command injection, SQL injection etc.).
+
+The malicious data is injected at the taint source (e.g. ``getenv()`` call)
+which is then propagated through function calls and being used as arguments of
+sensitive operations, also called as taint sinks (e.g. ``system()`` call).
+
+One can defend against this type of vulnerability by always checking and
+sanitizing the potentially malicious, untrusted user input.
+
+The goal of the checker is to discover and show to the user these potential
+taint source-sink pairs and the propagation call chain.
+
+The most notable examples of taint sources are:
+
+  - data from network
+  - files or standard input
+  - environment variables
+  - data from databases
+
+Let us examine a practical example of a Command Injection attack.
+
+.. code-block:: c
+
+  // Command Injection Vulnerability Example
+  int main(int argc, char** argv) {
+char cmd[2048] = "/bin/cat ";
+char filename[1024];
+printf("Filename:");
+scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape 
here
+strcat(cmd, filename);
+system(cmd); // Warning: Untrusted data is passed to a system call
+  }
+
+The program prints the content of any user specified file.
+Unfortunately the attacker can execute arbitrary commands
+with shell escapes. For example with the following input the `ls` command is 
also
+executed after the contents of `/etc/shadow` is printed.
+`Input: /etc/shadow ; ls /`
+
+The analysis implemented in this checker points out this problem.
+
+One can protect against such attack by for example checking if the provided
+input refers to a valid file and removing any invalid user input.
+
+.. code-block:: c
+
+  // No vulnerability anymore, but we

[clang] [analyzer] New optin.taint.TaintedAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From f6fdd544a90b865e5e0e530930db87cad405216e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/8] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] New optin.taint.TaintedAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From 80767176cbe8e5717c5f42b113f305d81b635cb9 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/4] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From f6fdd544a90b865e5e0e530930db87cad405216e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/4] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] New optin.taint.TaintAlloc checker for catching malicious memory allocation calls (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-05-28 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

- Handling of C++ operator new[] allocation was added to the checker with test 
cases
- The checker is renamed to optin.taint.TaintAlloc, as besides malloc it 
handles the c++ new array allocations too
- Test cases and documentation was updated

@NagyDonat , @steakhal  please check if any more update is needed. thanks.

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-05-29 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From f6fdd544a90b865e5e0e530930db87cad405216e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/5] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-03 Thread Daniel Krupp via cfe-commits


@@ -1730,6 +1721,21 @@ def UnixAPIPortabilityChecker : Checker<"UnixAPI">,
 
 } // end optin.portability
 
+
+//===--===//
+// Taint checkers.
+//===--===//
+
+let ParentPackage = TaintOptIn in {
+
+def TaintMallocChecker: Checker<"TaintMalloc">,
+  HelpText<"Check for memory allocations, where the size parameter "
+   "might be a tainted (attacker controlled) value.">,
+  Dependencies<[DynamicMemoryModeling]>,

dkrupp wrote:

I added the GenerictaintChecker as a dependency too. I think it makes sense to 
add it now so that we dont forget it later.

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-03 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-03 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From f6fdd544a90b865e5e0e530930db87cad405216e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/6] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-03 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

In the latest commit I fixed all remaining review comments.

GenericTaintchecker should be a dependency as mentioned in the FIXME, but it 
cannot be one until the checker is not a modeling checker. This separation will 
be done in a later follow-up patch. Until then, the documentation indicates the 
that alpha.security.taint.TaintPropagation checker should be switched on for 
this checker to work.


https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From f6fdd544a90b865e5e0e530930db87cad405216e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/7] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] New optin.taint.TaintedAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintedAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] New optin.taint.TaintedAlloc checker for catching unbounded memory allocation calls (PR #92420)

2024-06-05 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

Now the checker is renamed to optin.taint.TaintedAlloc as requested by the 
reviewers.

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Fix performance of getTaintedSymbolsImpl() (PR #89606)

2024-04-22 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp approved this pull request.

The suggested change make a lot of sense. Thanks.
LGTM.

https://github.com/llvm/llvm-project/pull/89606
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2024-04-26 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/4] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc8..b949cac504eddf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2024-04-30 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/5] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc8..b949cac504eddf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-

[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-27 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-27 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-27 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/67352

>From 11b85a494bfc844d9474efd2c9679cc5c0f4f889 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Thu, 15 Aug 2024 14:24:35 +0200
Subject: [PATCH 1/2] [analyzer] Moving TaintPropagation and GenericTaint
 checkers out of alpha

alpha.security.taint.TaintPropagation
modeling checker is renamed to optin.taint.TaintPropagation.

alpha.security.taint.GenericTaint
user facing checker is renamed to optin.taint.genericTaint

These checkers were stabilized and improved by recent commits,
thus it's ready for (optional) production use.

The checker is placed in the optin package as it implements
an optional security analysis.
---
 clang/docs/analyzer/checkers.rst  | 472 +-
 .../user-docs/TaintAnalysisConfiguration.rst  |   4 +-
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  43 +-
 clang/test/Analysis/analyzer-config.c |   2 +-
 .../Analysis/assume-controlled-environment.c  |   4 +-
 clang/test/Analysis/bool-assignment.c |   4 +-
 clang/test/Analysis/cxx-method-names.cpp  |   2 +-
 .../Analysis/debug-exprinspection-istainted.c |   2 +-
 .../sarif-diagnostics-taint-test.c.sarif  |   2 +-
 .../sarif-multi-diagnostic-test.c.sarif   |   2 +-
 .../sarif-diagnostics-taint-test.c|   2 +-
 .../diagnostics/sarif-multi-diagnostic-test.c |   3 +-
 clang/test/Analysis/fread.c   |   2 +-
 .../global-region-invalidation-errno.c|   4 +-
 .../Analysis/global-region-invalidation.c |   2 +-
 clang/test/Analysis/malloc.c  |   2 +-
 clang/test/Analysis/malloc.cpp|   8 +-
 .../test/Analysis/out-of-bounds-diagnostics.c |   2 +-
 clang/test/Analysis/out-of-bounds-notes.c |   2 +-
 clang/test/Analysis/redefined_system.c|   2 +-
 clang/test/Analysis/string.c  |   2 +-
 ...nt-checker-callback-order-has-definition.c |   2 +-
 ...hecker-callback-order-without-definition.c |   2 +-
 .../test/Analysis/taint-diagnostic-visitor.c  |   2 +-
 clang/test/Analysis/taint-dumps.c |   2 +-
 clang/test/Analysis/taint-generic.c   |  26 +-
 clang/test/Analysis/taint-generic.cpp |   2 +-
 clang/test/Analysis/taint-tester.c|   2 +-
 clang/test/Analysis/taint-tester.cpp  |   3 +-
 clang/test/Analysis/taint-tester.m|   6 +-
 clang/utils/analyzer/SATestBuild.py   |   2 +-
 31 files changed, 305 insertions(+), 312 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 0bfbc995579d41..7310f1be623438 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -992,6 +992,241 @@ optin.portability.UnixAPI
 "
 Finds implementation-defined behavior in UNIX/Posix functions.
 
+
+optin.taint
+
+
+Checkers implementing
+`taint analysis `_.
+
+.. _optin-taint-GenericTaint:
+
+optin.taint.GenericTaint (C, C++)
+""
+
+Taint analysis identifies potential security vulnerabilities where the
+attacker can inject malicious data to the program to execute an attack
+(privilege escalation, command injection, SQL injection etc.).
+
+The malicious data is injected at the taint source (e.g. ``getenv()`` call)
+which is then propagated through function calls and being used as arguments of
+sensitive operations, also called as taint sinks (e.g. ``system()`` call).
+
+One can defend against this type of vulnerability by always checking and
+sanitizing the potentially malicious, untrusted user input.
+
+The goal of the checker is to discover and show to the user these potential
+taint source-sink pairs and the propagation call chain.
+
+The most notable examples of taint sources are:
+
+  - data from network
+  - files or standard input
+  - environment variables
+  - data from databases
+
+Let us examine a practical example of a Command Injection attack.
+
+.. code-block:: c
+
+  // Command Injection Vulnerability Example
+  int main(int argc, char** argv) {
+char cmd[2048] = "/bin/cat ";
+char filename[1024];
+printf("Filename:");
+scanf (" %1023[^\n]", filename); // The attacker can inject a shell escape 
here
+strcat(cmd, filename);
+system(cmd); // Warning: Untrusted data is passed to a system call
+  }
+
+The program prints the content of any user specified file.
+Unfortunately the attacker can execute arbitrary commands
+with shell escapes. For example with the following input the `ls` command is 
also
+executed after the contents of `/etc/shadow` is printed.
+`Input: /etc/shadow ; ls /`
+
+The analysis implemented in this checker points out this problem.
+
+One can protect against such attack by for example checking if the provided
+input refers to a valid file and removing any invalid user input.
+
+.. code-block:: c
+
+  // No vulnerability anymore, bu

[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-08-27 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

@steakhal now the commit is rebased and the results in the description are also 
refreshed (not broken).

All the earlier problematic reports related to tainted integers (memset, 
malloc, memcpy ...) are not present now as these were removed from this checker 
as generic sinks by earlier commits.

Your suggested doc fixes also added.

https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-08-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From e979542270b21f4733baf25a7037675af598ca07 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  7 +++
 .../Checkers/DivZeroChecker.cpp   | 50 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 89a1018e14c0e6..fab060302310bb 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1053,6 +1053,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index fb4114619ac3d3..5a9afa0f15f5a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1710,6 +1710,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f280700..e900ef0c178a2f 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -223,6 +223,13 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+
+  template 
+  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..60e06ad699c92e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,16 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind {
+CK_DivZeroChecker,
+CK_TaintedDivChecker,
+CK_NumCheckKinds
+  };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +56,14 @@ static const E

[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-08-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From ccc5da054903568fbd317d5c773251ed84f8f087 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 89a1018e14c0e6..fab060302310bb 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1053,6 +1053,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index fb4114619ac3d3..5a9afa0f15f5a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1710,6 +1710,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f280700..e47c0c310eb8e7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -223,6 +223,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(const 

[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-08-28 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From ccc5da054903568fbd317d5c773251ed84f8f087 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH 1/2] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 89a1018e14c0e6..fab060302310bb 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1053,6 +1053,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index fb4114619ac3d3..5a9afa0f15f5a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1710,6 +1710,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f280700..e47c0c310eb8e7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -223,6 +223,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(co

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-09 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/98157

…ling checkers

Taint propagation is a a generic modeling feature of the Clang Static Analyzer 
which many other checkers depend on. Therefore GenericTaintChecker is split 
into a TaintPropagation modeling checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should set the 
TaintPropagation checker as their dependency.


>From fdd19cf4ea888fa48994ff3935d61d64a8d58c9d Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH] [analyzer] Splitting TaintPropagation checker into reporting
 and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  6 ++--
 .../user-docs/TaintAnalysisConfiguration.rst  | 11 +---
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 28 +++
 4 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..b7a3a4ebd927c 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..f0117cbfe02ad 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the `TaintPropagation` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The `TaintPropagation` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6e224a4e098ad..ec5dbd28a5272 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1071,7 +1071,7 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">,
 
 let ParentPackage = Taint in {
 
-def GenericTaintChecker : Checker<"TaintPropagation">,
+def TaintPropagationChecker : Checker<"TaintPropagation">, // Modelling checker
   HelpText<"Generate taint information used by other checkers">,
   CheckerOptions<[
 CmdLineOption,
   "",
   InAlpha>,
   ]>,
+  Documentation,
+  Hidden;
+
+def GenericTaintChecker : Checker<"GenericTaint">,
+  HelpText<"Reports potential injection vulnerabilities">,
+  Dependencies<[TaintPropagationChecker]>,
   Documentation;
 
 } // end "alpha.security.taint"
@@ -17

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-09 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From b8c54d9e91b7ec6760db24b687091246c7c31e3e Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH] [analyzer] Splitting TaintPropagation checker into reporting
 and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  6 ++--
 .../user-docs/TaintAnalysisConfiguration.rst  | 11 ---
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..b7a3a4ebd927c 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..f0117cbfe02ad 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the `TaintPropagation` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The `TaintPropagation` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6e224a4e098ad..ec5dbd28a5272 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1071,7 +1071,7 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">,
 
 let ParentPackage = Taint in {
 
-def GenericTaintChecker : Checker<"TaintPropagation">,
+def TaintPropagationChecker : Checker<"TaintPropagation">, // Modelling checker
   HelpText<"Generate taint information used by other checkers">,
   CheckerOptions<[
 CmdLineOption,
   "",
   InAlpha>,
   ]>,
+  Documentation,
+  Hidden;
+
+def GenericTaintChecker : Checker<"GenericTaint">,
+  HelpText<"Reports potential injection vulnerabilities">,
+  Dependencies<[TaintPropagationChecker]>,
   Documentation;
 
 } // end "alpha.security.taint"
@@ -1717,9 +1723,7 @@ let ParentPackage = TaintOptIn in {
 def TaintedAllocChecker: Checker<"TaintedAlloc">,
   HelpText<"Check for memory allocations, where the size parameter "
"might be a tainted (attacker controlled) value.">,
-  Dependencies<[DynamicMemoryModeling]>,
-  //FIXME: GenericTaintChecker should be a dependency, but only after it
-  //is transforme

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-09 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From 75675417c324a2d1df5e42a8549f6d4bcb779ab4 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH] [analyzer] Splitting TaintPropagation checker into reporting
 and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  8 ++---
 .../user-docs/TaintAnalysisConfiguration.rst  | 13 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..9216b1637271d 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1013,7 +1013,7 @@ covers the SEI Cert coding standard rule `INT04-C
 
 You can silence this warning either by bound checking the ``size`` parameter, 
or
 by explicitly marking the ``size`` parameter as sanitized. See the
-:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+:ref:`alpha-security-taint-GenericTaint` checker for an example.
 
 .. code-block:: c
 
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..bb10b3be758e4 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The ``TaintPropagation`` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
@@ -18,7 +21,7 @@ 
 
 Taint analysis works by checking for the occurrence of special operations 
during the symbolic execution of the program.
 Taint analysis defines sources, sinks, and propagation rules. It identifies 
errors by detecting a flow of information that originates from a taint source, 
reaches a taint sink, and propagates through the program paths via propagation 
rules.
-A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
:ref:`alpha-security-taint-TaintPropagation`.
+A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
``TaintPropagation`` checker.
 It is possible to express that a statement sanitizes tainted values by 
providing a ``Filters`` section in the external configuration (see 
:ref:`clangsa-taint-configuration-exam

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From 75675417c324a2d1df5e42a8549f6d4bcb779ab4 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH 1/2] [analyzer] Splitting TaintPropagation checker into
 reporting and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  8 ++---
 .../user-docs/TaintAnalysisConfiguration.rst  | 13 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..9216b1637271d 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1013,7 +1013,7 @@ covers the SEI Cert coding standard rule `INT04-C
 
 You can silence this warning either by bound checking the ``size`` parameter, 
or
 by explicitly marking the ``size`` parameter as sanitized. See the
-:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+:ref:`alpha-security-taint-GenericTaint` checker for an example.
 
 .. code-block:: c
 
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..bb10b3be758e4 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The ``TaintPropagation`` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
@@ -18,7 +21,7 @@ 
 
 Taint analysis works by checking for the occurrence of special operations 
during the symbolic execution of the program.
 Taint analysis defines sources, sinks, and propagation rules. It identifies 
errors by detecting a flow of information that originates from a taint source, 
reaches a taint sink, and propagates through the program paths via propagation 
rules.
-A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
:ref:`alpha-security-taint-TaintPropagation`.
+A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
``TaintPropagation`` checker.
 It is possible to express that a statement sanitizes tainted values by 
providing a ``Filters`` section in the external configuration (see 
:ref:`clangsa-taint-configuration-

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From 75675417c324a2d1df5e42a8549f6d4bcb779ab4 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH 1/3] [analyzer] Splitting TaintPropagation checker into
 reporting and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  8 ++---
 .../user-docs/TaintAnalysisConfiguration.rst  | 13 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..9216b1637271d 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1013,7 +1013,7 @@ covers the SEI Cert coding standard rule `INT04-C
 
 You can silence this warning either by bound checking the ``size`` parameter, 
or
 by explicitly marking the ``size`` parameter as sanitized. See the
-:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+:ref:`alpha-security-taint-GenericTaint` checker for an example.
 
 .. code-block:: c
 
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..bb10b3be758e4 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The ``TaintPropagation`` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
@@ -18,7 +21,7 @@ 
 
 Taint analysis works by checking for the occurrence of special operations 
during the symbolic execution of the program.
 Taint analysis defines sources, sinks, and propagation rules. It identifies 
errors by detecting a flow of information that originates from a taint source, 
reaches a taint sink, and propagates through the program paths via propagation 
rules.
-A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
:ref:`alpha-security-taint-TaintPropagation`.
+A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
``TaintPropagation`` checker.
 It is possible to express that a statement sanitizes tainted values by 
providing a ``Filters`` section in the external configuration (see 
:ref:`clangsa-taint-configuration-

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From 75675417c324a2d1df5e42a8549f6d4bcb779ab4 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH 1/4] [analyzer] Splitting TaintPropagation checker into
 reporting and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  8 ++---
 .../user-docs/TaintAnalysisConfiguration.rst  | 13 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..9216b1637271d 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1013,7 +1013,7 @@ covers the SEI Cert coding standard rule `INT04-C
 
 You can silence this warning either by bound checking the ``size`` parameter, 
or
 by explicitly marking the ``size`` parameter as sanitized. See the
-:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+:ref:`alpha-security-taint-GenericTaint` checker for an example.
 
 .. code-block:: c
 
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..bb10b3be758e4 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The ``TaintPropagation`` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
@@ -18,7 +21,7 @@ 
 
 Taint analysis works by checking for the occurrence of special operations 
during the symbolic execution of the program.
 Taint analysis defines sources, sinks, and propagation rules. It identifies 
errors by detecting a flow of information that originates from a taint source, 
reaches a taint sink, and propagates through the program paths via propagation 
rules.
-A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
:ref:`alpha-security-taint-TaintPropagation`.
+A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
``TaintPropagation`` checker.
 It is possible to express that a statement sanitizes tainted values by 
providing a ``Filters`` section in the external configuration (see 
:ref:`clangsa-taint-configuration-

[clang] [analyzer] Splitting TaintPropagation checker into reporting and mode… (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

Thanks for the review. I updated the patch with your suggestions.
-std::unique_pointer changed to std::optional
-I fixed documentation related grammatical and refernce errors.

https://github.com/llvm/llvm-project/pull/98157
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Split TaintPropagation checker into reporting and modeling checkers (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/98157

>From 75675417c324a2d1df5e42a8549f6d4bcb779ab4 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Fri, 5 Jul 2024 14:02:00 +0200
Subject: [PATCH 1/5] [analyzer] Splitting TaintPropagation checker into
 reporting and modeling checkers

Taint propagation is a a generic modeling feature of the
Clang Static Analyzer which many other checkers depend on.
Therefore GenericTaintChecker is split into a TaintPropagation modeling
checker and a GenericTaint reporting checker.

Other checkers, which report taint related warnings, should
set the TaintPropagation checker as their dependency.
---
 clang/docs/analyzer/checkers.rst  |  8 ++---
 .../user-docs/TaintAnalysisConfiguration.rst  | 13 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 12 +---
 .../Checkers/GenericTaintChecker.cpp  | 29 +++
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 42c097d973d53..9216b1637271d 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1013,7 +1013,7 @@ covers the SEI Cert coding standard rule `INT04-C
 
 You can silence this warning either by bound checking the ``size`` parameter, 
or
 by explicitly marking the ``size`` parameter as sanitized. See the
-:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+:ref:`alpha-security-taint-GenericTaint` checker for an example.
 
 .. code-block:: c
 
@@ -3011,10 +3011,10 @@ alpha.security.taint
 Checkers implementing
 `taint analysis `_.
 
-.. _alpha-security-taint-TaintPropagation:
+.. _alpha-security-taint-GenericTaint:
 
-alpha.security.taint.TaintPropagation (C, C++)
-""
+alpha.security.taint.GenericTaint (C, C++)
+""
 
 Taint analysis identifies potential security vulnerabilities where the
 attacker can inject malicious data to the program to execute an attack
diff --git a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst 
b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
index 94db84494e00b..bb10b3be758e4 100644
--- a/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
+++ b/clang/docs/analyzer/user-docs/TaintAnalysisConfiguration.rst
@@ -2,10 +2,13 @@
 Taint Analysis Configuration
 
 
-The Clang Static Analyzer uses taint analysis to detect security-related 
issues in code.
-The backbone of taint analysis in the Clang SA is the `GenericTaintChecker`, 
which the user can access via the :ref:`alpha-security-taint-TaintPropagation` 
checker alias and this checker has a default taint-related configuration.
-The built-in default settings are defined in code, and they are always in 
effect once the checker is enabled, either directly or via the alias.
-The checker also provides a configuration interface for extending the default 
settings by providing a configuration file in `YAML 
`_ format.
+The Clang Static Analyzer uses taint analysis to detect injection 
vulnerability related issues in code.
+The backbone of taint analysis in the Clang SA is the ``TaintPropagation`` 
modeling checker.
+The reports are emitted via the :ref:`alpha-security-taint-GenericTaint` 
checker.
+The ``TaintPropagation`` checker has a default taint-related configuration.
+The built-in default settings are defined in code, and they are always in 
effect.
+The checker also provides a configuration interface for extending the default 
settings via the ``alpha.security.taint.TaintPropagation:Config`` checker 
config parameter
+by providing a configuration file to the in `YAML 
`_ format.
 This documentation describes the syntax of the configuration file and gives 
the informal semantics of the configuration options.
 
 .. contents::
@@ -18,7 +21,7 @@ 
 
 Taint analysis works by checking for the occurrence of special operations 
during the symbolic execution of the program.
 Taint analysis defines sources, sinks, and propagation rules. It identifies 
errors by detecting a flow of information that originates from a taint source, 
reaches a taint sink, and propagates through the program paths via propagation 
rules.
-A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
:ref:`alpha-security-taint-TaintPropagation`.
+A source, sink, or an operation that propagates taint is mainly 
domain-specific knowledge, but there are some built-in defaults provided by 
``TaintPropagation`` checker.
 It is possible to express that a statement sanitizes tainted values by 
providing a ``Filters`` section in the external configuration (see 
:ref:`clangsa-taint-configuration-

[clang] [analyzer] Split TaintPropagation checker into reporting and modeling checkers (PR #98157)

2024-07-10 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/98157
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2024-05-02 Thread Daniel Krupp via cfe-commits
=?utf-8?q?Donát?= Nagy ,Daniel Krupp
 
Message-ID:
In-Reply-To: 


https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/7] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc8..b949cac504eddf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
-   TR

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2024-05-02 Thread Daniel Krupp via cfe-commits
=?utf-8?q?Donát?= Nagy ,Daniel Krupp
 ,
=?utf-8?q?Donát?= Nagy ,Daniel Krupp
 
Message-ID:
In-Reply-To: 


https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/68607

>From 143db26ffe8620c2b45eb15d331466c883bbfce0 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 9 Oct 2023 16:52:13 +0200
Subject: [PATCH 1/9] [analyzer] Removing untrusted buffer size taint warning

alpha.security.taint.TaintPropagation checker
emitted a false warning to the following code

char buf[100];
size_t size = tainted();
if (size > 100)
  return;
memset(buf, 0, size); // warn: untrusted data used as buffer size

The checker does not take into consideration that the
size tainted variable is bounded.

The false warning was emmitted also for the malloc/calloc calls.

These warning (the sink) should be implemented in the
MallocChecker and CStringChecker checkers instead, where more sophisticated
handling can be done taking into consideration buffer size and integer 
constraints.
---
 .../Checkers/GenericTaintChecker.cpp  | 49 +
 .../test/Analysis/taint-diagnostic-visitor.c  | 68 +--
 clang/test/Analysis/taint-generic.c   | 26 ---
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 4ceaf933d0bfc8..b949cac504eddf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -59,13 +59,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size in strn.. functions,
-/// and allocators.
-constexpr llvm::StringLiteral MsgTaintedBufferSize =
-"Untrusted data is used to specify the buffer size "
-"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
-"for character data and the null terminator)";
-
 /// Check if tainted data is used as a custom sink's parameter.
 constexpr llvm::StringLiteral MsgCustomSink =
 "Untrusted data is passed to a user-defined sink";
@@ -733,13 +726,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{CDF_MaybeBuiltin, {{"stpcpy"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strcat"}}},
-   TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+   TR::Prop({{0,1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsncat"}}},
TR::Prop({{1}}, {{0, ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"strdupa"}}},
TR::Prop({{0}}, {{ReturnValueIndex}})},
   {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, 
{{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
+   TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
+   TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
+  {{CDF_MaybeBuiltin, {"bcopy"}},
+   TR::Prop({{0, 2}}, {{1}})},
 
   // Sinks
   {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
@@ -753,32 +756,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext 
&C) const {
   {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},
   {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},
   {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
-  {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, 
MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"memccpy"}}},
-   TR::Sink({{3}}, MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {{"realloc"}}},
-   TR::Sink({{1}}, MsgTaintedBufferSize)},
+   // malloc, calloc, alloca, realloc, memccpy
+   // are intentionally left out as taint sinks
+   // because unconditional reporting for these functions
+   // generate many false positives.
+   // These taint sinks should be implemented in other checkers
+   // with more sophisticated sanitation heuristics.
   "setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
   "setproctitle_fast"}}},
TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
-
-  // SinkProps
-  {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
-   TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
-MsgTaintedBufferSize)},
-  {{CDF_MaybeBuiltin, {BI.g

[clang] [analyzer] Removing untrusted buffer size taint warning (PR #68607)

2024-05-02 Thread Daniel Krupp via cfe-commits
=?utf-8?q?Donát?= Nagy ,Daniel Krupp
 ,
=?utf-8?q?Donát?= Nagy ,Daniel Krupp
 
Message-ID:
In-Reply-To: 


https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/68607
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-16 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/92420

unix.Malloc checker will warn if a memory allocation function (malloc, calloc, 
realloc, alloca) is called with a tainted (attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion. To get this 
warning, the alpha.security.taint.TaintPropagation checker also needs to be 
switched on.

The warning will only be emitted, if the analyzer cannot prove that the size is 
below reasonable bounds (From 80767176cbe8e5717c5f42b113f305d81b635cb9 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH] [analyzer] Adding taint analysis capability to unix.Malloc
 checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
   

[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-23 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From 80767176cbe8e5717c5f42b113f305d81b635cb9 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/2] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-23 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From 80767176cbe8e5717c5f42b113f305d81b635cb9 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/2] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-23 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

Thanks for the reviews. I updated the patch.

@haoNoQ 
- I changed the report to non-fatal
- I factored out the warning into a new checker optin.taint.TaintMalloc. This 
way the checker can be enabled separately. Of course, the 
alpha.security.taint.TaintPropagation checker is a prerequisite as indicated in 
the checker doc.

@steakhal 
- New test is added to the taint-diagnostic-visitor.c to test the taint related 
notes diagnostics.

-Minor changes addressed as requested.

Could you please check again?


https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-27 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/92420

>From 80767176cbe8e5717c5f42b113f305d81b635cb9 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Tue, 30 Apr 2024 15:20:52 +0200
Subject: [PATCH 1/3] [analyzer] Adding taint analysis capability to
 unix.Malloc checker

unix.Malloc checker will warn if a memory allocation function
(malloc, calloc, realloc, alloca) is called with a tainted
(attacker controlled) size parameter.
A large, maliciously set size value can trigger memory exhaustion.
To get this warning, the alpha.security.taint.TaintPropagation checker
also needs to be switched on.

The warning will only be emitted, if the analyzer cannot prove
that the size is below reasonable bounds (https://wiki.sei.cmu.edu/confluence/display/c/INT04-C.+Enforce+limits+on+integer+values+originating+from+tainted+sources>`_.
+
+You can silence this warning either by bound checking the ``size`` parameter, 
or
+by explicitly marking the ``size`` parameter as sanitized. See the
+:ref:`alpha-security-taint-TaintPropagation` checker for more details.
+
+.. code-block:: c
+
+  void t1(void) {
+size_t size;
+scanf("%zu", &size);
+int *p = malloc(size); // warn: malloc is called with a tainted 
(potentially attacker controlled) value
+free(p);
+  }
+
+  void t3(void) {
+size_t size;
+scanf("%zu", &size);
+if (1024 BT_MismatchedDealloc;
   mutable std::unique_ptr BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr BT_UseZerroAllocated[CK_NumCheckKinds];
+  mutable std::unique_ptr BT_TaintedAlloc[CK_NumCheckKinds];
 
 #define CHECK_FN(NAME) 
\
   void NAME(const CallEvent &Call, CheckerContext &C) const;
@@ -462,6 +464,13 @@ class MallocChecker
   };
 
   bool isMemCall(const CallEvent &Call) const;
+  void reportTaintBug(StringRef Msg, ProgramStateRef State, CheckerContext &C,
+  llvm::ArrayRef TaintedSyms,
+  AllocationFamily Family, const Expr *SizeEx) const;
+
+  void CheckTaintedness(CheckerContext &C, const CallEvent &Call,
+const SVal SizeSVal, ProgramStateRef State,
+AllocationFamily Family) const;
 
   // TODO: Remove mutable by moving the initializtaion to the registry 
function.
   mutable std::optional KernelZeroFlagVal;
@@ -521,9 +530,9 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
+  [[nodiscard]] ProgramStateRef
   MallocMemAux(CheckerContext &C, const CallEvent &Call, const Expr *SizeEx,
-   SVal Init, ProgramStateRef State, AllocationFamily Family);
+   SVal Init, ProgramStateRef State, AllocationFamily Family) 
const;
 
   /// Models memory allocation.
   ///
@@ -534,9 +543,10 @@ class MallocChecker
   /// malloc leaves it undefined.
   /// \param [in] State The \c ProgramState right before allocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  MallocMemAux(CheckerContext &C, const CallEvent &Call, SVal Size, SVal Init,
-   ProgramStateRef State, AllocationFamily Family);
+  [[nodiscard]] ProgramStateRef MallocMemAux(CheckerContext &C,
+ const CallEvent &Call, SVal Size,
+ SVal Init, ProgramStateRef State,
+ AllocationFamily Family) const;
 
   // Check if this malloc() for special flags. At present that means M_ZERO or
   // __GFP_ZERO (in which case, treat it like calloc).
@@ -649,8 +659,9 @@ class MallocChecker
   /// \param [in] Call The expression that reallocated memory
   /// \param [in] State The \c ProgramState right before reallocation.
   /// \returns The ProgramState right after allocation.
-  [[nodiscard]] static ProgramStateRef
-  CallocMem(CheckerContext &C, const CallEvent &Call, ProgramStateRef State);
+  [[nodiscard]] ProgramStateRef CallocMem(CheckerContext &C,
+  const CallEvent &Call,
+  ProgramStateRef State) const;
 
   /// See if deallocation happens in a suspicious context. If so, escape the
   /// pointers that otherwise would have been deallocated and return true.
@@ -1779,7 +1790,7 @@ ProgramStateRef 
MallocChecker::MallocMemAux(CheckerContext &C,
 const CallEvent &Call,
 const Expr *SizeEx, SVal Init,
 ProgramStateRef State,
-AllocationFamily Family) {
+AllocationFamily Family) const {
   if (!State)
 return nullptr;
 
@@ -1787,10 +1798,71 @@ ProgramStateRef 
MallocChecker:

[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-27 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

> The patch makes sense to me. Have you considered applying the same heuristic 
> to C++ array new allocations?
> 
> I'll port this patch downstream to see how this would behave on the Juliet 
> C++ benchmark or on some real-world code.

I will check C++

> The patch makes sense to me. Have you considered applying the same heuristic 
> to C++ array new allocations?
> 
> I'll port this patch downstream to see how this would behave on the Juliet 
> C++ benchmark or on some real-world code.

I will consider adding the heuristic for C++ array new allocations in a 
follow-up patch.

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-27 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

@NagyDonat , @steakhal  I fixed the additional remarks. 
Is there anything else that's need to be done before merging? Thanks.

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding taint analysis capability to unix.Malloc checker (PR #92420)

2024-05-27 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

> @NagyDonat , @steakhal I fixed the additional remarks. Is there anything else 
> that's need to be done before merging? Thanks.

I see now, that there is still one unaddressed remark from @NagyDonat regarding 
a new testcase for array new allocations. I will be adding it tomorrow...

https://github.com/llvm/llvm-project/pull/92420
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-09-13 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp edited https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Focused taint (PR #112212)

2024-10-14 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/112212

None

>From 972c3089bffbce3516b711c4fc02df561b98433f Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 3 Jun 2024 13:45:17 +0200
Subject: [PATCH 1/8] taint example code

---
 .../StaticAnalyzer/taint_focused/taintcalls.c | 25 +++
 1 file changed, 25 insertions(+)
 create mode 100644 clang/lib/StaticAnalyzer/taint_focused/taintcalls.c

diff --git a/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c 
b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c
new file mode 100644
index 00..d0e1e980d62be5
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+char buf[1024];
+
+int fetchTaintedString(char *txt){
+  scanf("%s", txt);
+}
+
+int exec(char* cmd){
+  system(cmd);//warn here
+}
+
+void topLevel(){
+  char cmd[2048] = "/bin/cat ";
+  char filename[1024];
+  fetchTaintedString (filename);
+  strcat(cmd, filename);
+  exec(cmd);
+}
+
+void printNum(int data){
+  printf("Data:%d\n",data);
+}
\ No newline at end of file

>From e5d5e07b76ec412586d07809e5237faee4204a91 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 1 Jul 2024 05:47:57 +0200
Subject: [PATCH 2/8] [analyzer] Focused taint analysis

A new option is added to Clang Static Analyzer
that drives the symbolic execution to only
analyzer those top level functions which
may yield taint related findings.
---
 clang/include/clang/Driver/Options.td |  3 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 11 ++-
 .../StaticAnalyzer/Core/AnalyzerOptions.h |  2 +
 .../Checkers/GenericTaintChecker.cpp  | 62 +++
 .../Frontend/AnalysisConsumer.cpp | 78 +++
 5 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 3f4d1a328b4c27..9d5f2d3af62749 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7005,6 +7005,9 @@ def analyzer_opt_analyze_headers : Flag<["-"], 
"analyzer-opt-analyze-headers">,
 def analyzer_display_progress : Flag<["-"], "analyzer-display-progress">,
   HelpText<"Emit verbose output about the analyzer's progress">,
   MarshallingInfoFlag>;
+def analyzer_focused_taint : Flag<["-"], "analyzer-focused-taint">,
+  HelpText<"Focus on taint analysis">,
+  MarshallingInfoFlag>;
 def analyzer_note_analysis_entry_points : Flag<["-"], 
"analyzer-note-analysis-entry-points">,
   HelpText<"Add a note for each bug report to denote their analysis entry 
points">,
   MarshallingInfoFlag>;
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 7da0d0a87e8c0c..7e8d851ac7b818 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1048,7 +1048,6 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">,
 
 } // end "alpha.security"
 
-
 
//===--===//
 // Mac OS X, Cocoa, and Core Foundation checkers.
 
//===--===//
@@ -1693,6 +1692,16 @@ def TaintPropagationChecker : 
Checker<"TaintPropagation">, // Modelling checker
 
 def GenericTaintChecker : Checker<"GenericTaint">,
   HelpText<"Reports potential injection vulnerabilities">,
+  CheckerOptions<[
+CmdLineOption,
+  ]>,
   Dependencies<[TaintPropagationChecker]>,
   Documentation;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h 
b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 3a3c1a13d67dd5..1d21a03fdb960c 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -227,6 +227,7 @@ class AnalyzerOptions : public 
RefCountedBase {
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
   unsigned AnalyzerDisplayProgress : 1;
+  unsigned AnalyzerFocusedTaint : 1;
   unsigned AnalyzerNoteAnalysisEntryPoints : 1;
 
   unsigned eagerlyAssumeBinOpBifurcation : 1;
@@ -293,6 +294,7 @@ class AnalyzerOptions : public 
RefCountedBase {
 ShowConfigOptionsList(false),
 ShouldEmitErrorsOnInvalidConfigValue(false), AnalyzeAll(false),
 AnalyzerDisplayProgress(false), AnalyzerNoteAnalysisEntryPoints(false),
+AnalyzerFocusedTaint(false),
 eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
 visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false),
 PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false) {}
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b89a6e2588c987..f8af9eedb689ca 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTai

[clang] Focused taint (PR #112215)

2024-10-14 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp created 
https://github.com/llvm/llvm-project/pull/112215

None

>From 972c3089bffbce3516b711c4fc02df561b98433f Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 3 Jun 2024 13:45:17 +0200
Subject: [PATCH 1/8] taint example code

---
 .../StaticAnalyzer/taint_focused/taintcalls.c | 25 +++
 1 file changed, 25 insertions(+)
 create mode 100644 clang/lib/StaticAnalyzer/taint_focused/taintcalls.c

diff --git a/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c 
b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c
new file mode 100644
index 00..d0e1e980d62be5
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+char buf[1024];
+
+int fetchTaintedString(char *txt){
+  scanf("%s", txt);
+}
+
+int exec(char* cmd){
+  system(cmd);//warn here
+}
+
+void topLevel(){
+  char cmd[2048] = "/bin/cat ";
+  char filename[1024];
+  fetchTaintedString (filename);
+  strcat(cmd, filename);
+  exec(cmd);
+}
+
+void printNum(int data){
+  printf("Data:%d\n",data);
+}
\ No newline at end of file

>From e5d5e07b76ec412586d07809e5237faee4204a91 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Mon, 1 Jul 2024 05:47:57 +0200
Subject: [PATCH 2/8] [analyzer] Focused taint analysis

A new option is added to Clang Static Analyzer
that drives the symbolic execution to only
analyzer those top level functions which
may yield taint related findings.
---
 clang/include/clang/Driver/Options.td |  3 +
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 11 ++-
 .../StaticAnalyzer/Core/AnalyzerOptions.h |  2 +
 .../Checkers/GenericTaintChecker.cpp  | 62 +++
 .../Frontend/AnalysisConsumer.cpp | 78 +++
 5 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 3f4d1a328b4c27..9d5f2d3af62749 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7005,6 +7005,9 @@ def analyzer_opt_analyze_headers : Flag<["-"], 
"analyzer-opt-analyze-headers">,
 def analyzer_display_progress : Flag<["-"], "analyzer-display-progress">,
   HelpText<"Emit verbose output about the analyzer's progress">,
   MarshallingInfoFlag>;
+def analyzer_focused_taint : Flag<["-"], "analyzer-focused-taint">,
+  HelpText<"Focus on taint analysis">,
+  MarshallingInfoFlag>;
 def analyzer_note_analysis_entry_points : Flag<["-"], 
"analyzer-note-analysis-entry-points">,
   HelpText<"Add a note for each bug report to denote their analysis entry 
points">,
   MarshallingInfoFlag>;
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 7da0d0a87e8c0c..7e8d851ac7b818 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1048,7 +1048,6 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">,
 
 } // end "alpha.security"
 
-
 
//===--===//
 // Mac OS X, Cocoa, and Core Foundation checkers.
 
//===--===//
@@ -1693,6 +1692,16 @@ def TaintPropagationChecker : 
Checker<"TaintPropagation">, // Modelling checker
 
 def GenericTaintChecker : Checker<"GenericTaint">,
   HelpText<"Reports potential injection vulnerabilities">,
+  CheckerOptions<[
+CmdLineOption,
+  ]>,
   Dependencies<[TaintPropagationChecker]>,
   Documentation;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h 
b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 3a3c1a13d67dd5..1d21a03fdb960c 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -227,6 +227,7 @@ class AnalyzerOptions : public 
RefCountedBase {
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
   unsigned AnalyzerDisplayProgress : 1;
+  unsigned AnalyzerFocusedTaint : 1;
   unsigned AnalyzerNoteAnalysisEntryPoints : 1;
 
   unsigned eagerlyAssumeBinOpBifurcation : 1;
@@ -293,6 +294,7 @@ class AnalyzerOptions : public 
RefCountedBase {
 ShowConfigOptionsList(false),
 ShouldEmitErrorsOnInvalidConfigValue(false), AnalyzeAll(false),
 AnalyzerDisplayProgress(false), AnalyzerNoteAnalysisEntryPoints(false),
+AnalyzerFocusedTaint(false),
 eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
 visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false),
 PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false) {}
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b89a6e2588c987..f8af9eedb689ca 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTai

[clang] Focused taint (PR #112212)

2024-10-14 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed 
https://github.com/llvm/llvm-project/pull/112212
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Focused taint (PR #112215)

2024-10-14 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed 
https://github.com/llvm/llvm-project/pull/112215
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Focused taint (PR #112215)

2024-10-14 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp converted_to_draft 
https://github.com/llvm/llvm-project/pull/112215
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [analyzer] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-09-26 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From beb6f6787f4a92e8892ba8f19d0af924edd56e3b Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH 1/3] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 47c6fc680deb1b..60760b8caa 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1288,6 +1288,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 7da0d0a87e8c0c..d6a38d57846df5 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1703,6 +1703,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 24c5b66fd58220..8e0af1b6db9fa2 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -221,6 +221,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(co

[clang] [analyzer] Moving TaintPropagation checker out of alpha (PR #67352)

2024-09-26 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp closed https://github.com/llvm/llvm-project/pull/67352
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-09-26 Thread Daniel Krupp via cfe-commits

dkrupp wrote:

> LGTM overall, I added some minor inline remarks.
> 
> Also consider adding a few simple testcases to distinguish the effects of 
> DivideZero and TaintedDiv. It would also be interesting to highlight what 
> happens in situations like
> 
> ```c
> int test(void) {
>   int x = getchar(); // or any other taint source
>   if (!x)
> return 5 / x;
>   return 8;
> }
> ```
> 
> (I presume that in this case core.DivideZero will create a bug report, but 
> the new TaintedDiv checker won't.)

Test added.

https://github.com/llvm/llvm-project/pull/106389
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-09-26 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From ccc5da054903568fbd317d5c773251ed84f8f087 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH 1/3] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 89a1018e14c0e6..fab060302310bb 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1053,6 +1053,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index fb4114619ac3d3..5a9afa0f15f5a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1710,6 +1710,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f280700..e47c0c310eb8e7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -223,6 +223,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(co

[clang] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-09-26 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From ccc5da054903568fbd317d5c773251ed84f8f087 Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH 1/3] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 89a1018e14c0e6..fab060302310bb 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1053,6 +1053,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index fb4114619ac3d3..5a9afa0f15f5a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1710,6 +1710,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index ad25d18f280700..e47c0c310eb8e7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -223,6 +223,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(co

[clang] [analyzer] Adding optin.taint.TaintedDiv checker (PR #106389)

2024-09-30 Thread Daniel Krupp via cfe-commits

https://github.com/dkrupp updated 
https://github.com/llvm/llvm-project/pull/106389

>From beb6f6787f4a92e8892ba8f19d0af924edd56e3b Mon Sep 17 00:00:00 2001
From: Daniel Krupp 
Date: Wed, 28 Aug 2024 15:32:35 +0200
Subject: [PATCH 1/4] Adding optin.taint.TaintedDiv checker

Tainted division operation is separated out from the core.DivideZero
checker into the optional optin.taint.TaintedDiv checker.
The checker warns when the denominator in a division operation
is an attacker controlled value.
---
 clang/docs/analyzer/checkers.rst  | 28 +++
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  6 +++
 .../StaticAnalyzer/Core/CheckerManager.h  |  5 ++
 .../Checkers/DivZeroChecker.cpp   | 46 +--
 .../test/Analysis/taint-diagnostic-visitor.c  |  2 +-
 clang/test/Analysis/taint-generic.c   |  3 ++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 47c6fc680deb1b..60760b8caa 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1288,6 +1288,34 @@ by explicitly marking the ``size`` parameter as 
sanitized. See the
 delete[] ptr;
   }
 
+.. _optin-taint-TaintedDiv:
+
+optin.taint.TaintedDiv (C, C++, ObjC)
+"
+This checker warns when the denominator in a division
+operation is a tainted (potentially attacker controlled) value.
+If the attacker can set the denominator to 0, a runtime error can
+be triggered. The checker warns if the analyzer cannot prove
+that the denominator is not 0 and it is a tainted value.
+This warning is more pessimistic than the :ref:`core-DivideZero` checker
+which warns only when it can prove that the denominator is 0.
+
+.. code-block:: c
+
+  int vulnerable(int n) {
+size_t size = 0;
+scanf("%zu", &size);
+return n/size; // warn: Division by a tainted value, possibly zero
+  }
+
+  int not_vulnerable(void) {
+size_t size = 0;
+scanf("%zu", &size);
+if (!size)
+  return 0;
+return n/size; // no warning
+  }
+
 .. _security-checkers:
 
 security
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 7da0d0a87e8c0c..d6a38d57846df5 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1703,6 +1703,12 @@ def TaintedAllocChecker: Checker<"TaintedAlloc">,
   Dependencies<[DynamicMemoryModeling, TaintPropagationChecker]>,
   Documentation;
 
+def TaintedDivChecker: Checker<"TaintedDiv">,
+  HelpText<"Check for divisions, where the denominator "
+   "might be 0 as it is a tainted (attacker controlled) value.">,
+  Dependencies<[TaintPropagationChecker]>,
+  Documentation;
+
 } // end "optin.taint"
 
 
//===--===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h 
b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 24c5b66fd58220..8e0af1b6db9fa2 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -221,6 +221,11 @@ class CheckerManager {
 return static_cast(CheckerTags[tag]);
   }
 
+  template  bool isRegisteredChecker() {
+CheckerTag tag = getTag();
+return (CheckerTags.count(tag) != 0);
+  }
+
 
//===--===//
 // Functions for running checkers for AST traversing.
 
//===--===//
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 5496f087447fbe..89aac24d7576e5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -26,8 +26,6 @@ using namespace taint;
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt > {
-  const BugType BT{this, "Division by zero"};
-  const BugType TaintBT{this, "Division by zero", categories::TaintedData};
   void reportBug(StringRef Msg, ProgramStateRef StateZero,
  CheckerContext &C) const;
   void reportTaintBug(StringRef Msg, ProgramStateRef StateZero,
@@ -35,6 +33,12 @@ class DivZeroChecker : public Checker< 
check::PreStmt > {
   llvm::ArrayRef TaintedSyms) const;
 
 public:
+  /// This checker class implements multiple user facing checker
+  enum CheckKind { CK_DivZeroChecker, CK_TaintedDivChecker, CK_NumCheckKinds };
+  bool ChecksEnabled[CK_NumCheckKinds] = {false};
+  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  mutable std::unique_ptr BugTypes[CK_NumCheckKinds];
+
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
 };
 } // end anonymous namespace
@@ -48,8 +52,14 @@ static const Expr *getDenomExpr(co

  1   2   >