https://github.com/dkrupp updated https://github.com/llvm/llvm-project/pull/178054
>From 14c0472a6fc98d07ad374d832f4bf24d6583e80e Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Mon, 26 Jan 2026 22:17:41 +0100 Subject: [PATCH 1/6] [clang][analyzer] Add taintedness to argv If the execution environment is untrusted, we assume that the argv of the main function is an attacker controlled value and set it as an taint analysis source. --- clang/docs/analyzer/checkers.rst | 9 +++- .../Checkers/GenericTaintChecker.cpp | 40 +++++++++++++- .../Analysis/taint-diagnostic-visitor-main.c | 54 +++++++++++++++++++ clang/test/Analysis/taint-generic.c | 13 +++++ 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main.c diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 31edf9e99dc7d..a3ac11935afd8 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1371,9 +1371,16 @@ For a more detailed description of configuration options, please see the **Configuration** -* `Config` Specifies the name of the YAML configuration file. The user can +* ``Config`` Specifies the name of the YAML configuration file. The user can define their own taint sources and sinks. +* The if the analyzer option ``assume-controlled-environment`` is set to ``false``, + it is assumed that the command line arguments and the environment + variables of the program are attacker controlled. + In particular, the ``argv`` argument of the ``main`` function and + the return value of the ``getenv()`` function are assumed to + hold tainted values. + **Related Guidelines** * `CWE Data Neutralization Issues diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index bea8f3f13ba21..9d88f85c88ad2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs = constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; +const std::string MsgTaintOrigin = "Taint originated here"; + using ArgIdxTy = int; using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; @@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C, return ""; } if (TaintedSymbols.empty()) - return "Taint originated here"; + return MsgTaintOrigin; for (auto Sym : TaintedSymbols) { BR.markInteresting(Sym); @@ -378,10 +380,12 @@ struct GenericTaintRuleParser { CheckerManager &Mgr; }; -class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { +class GenericTaintChecker + : public Checker<check::PreCall, check::PostCall, check::BeginFunction> { public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkBeginFunction(CheckerContext &C) const; void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; @@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::make_move_iterator(Rules.end())); } +// The incoming parameters of the main function get tainted +// if the program called in an untrusted environment. +void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { + if (!C.inTopFrame() || C.getAnalysisManager() + .getAnalyzerOptions() + .ShouldAssumeControlledEnvironment) + return; + + const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl()); + if (!FD || !FD->isMain() || FD->param_size() < 2) + return; + + ProgramStateRef State = C.getState(); + const MemRegion *ArgvReg = + State->getRegion(FD->parameters()[1], C.getLocationContext()); + SVal ArgvSval = State->getSVal(ArgvReg); + // Add taintedness to argv** + State = addTaint(State, ArgvSval); + + const NoteTag *OriginatingTag = + C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string { + // We give diagnostics only for taint related reports + if (!BR.isInteresting(ArgvSval) || + BR.getBugType().getCategory() != categories::TaintedData) + return ""; + + return MsgTaintOrigin; + }); + C.addTransition(State, OriginatingTag); +} + void GenericTaintChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + initTaintRules(C); // FIXME: this should be much simpler. diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c new file mode 100644 index 0000000000000..315e352cb549f --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -0,0 +1,54 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify=expected,untrusted %s +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify=expected,trusted %s + +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + +int scanf(const char *restrict format, ...); +int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +char *strcat( char *dest, const char *src ); +char * strncat ( char * destination, const char * source, size_t num ); +char* strcpy( char* dest, const char* src ); +char * strncpy ( char * destination, const char * source, size_t num ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +extern FILE *stdin; + + +#ifdef UNTRUSTED +// In an untrusted environment the cmd line arguments +// are assumed to be tainted. +int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}} + if (argc < 1)// untrusted-note {{'argc' is >= 1}} + // untrusted-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note {{Taint propagated to the 1st argument}} + system(cmd);// untrusted-warning {{Untrusted data is passed to a system call}} + // untrusted-note@-1 {{Untrusted data is passed to a system call}} + return 0; + } +#else +int main(int argc, char * argv[]) { + if (argc < 1)// trusted-note {{'argc' is >= 1}} + // trusted-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + scanf("%s", filename);// trusted-note {{Taint originated here}} + // trusted-note@-1 {{Taint propagated to the 2nd argument}} + strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// trusted-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint propagated to the 1st argument}} + system(cmd);// trusted-warning {{Untrusted data is passed to a system call}} + // trusted-note@-1 {{Untrusted data is passed to a system call}} + return 0; + } + #endif diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 6017483f06b6d..93a5b9b3a53cd 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2); char *strncpy( char * destination, const char * source, size_t num ); char *strndup(const char *s, size_t n); char *strncat(char *restrict s1, const char *restrict s2, size_t n); +char *strcat( char *dest, const char *src ); void *malloc(size_t); void *calloc(size_t nmemb, size_t size); @@ -1396,3 +1397,15 @@ void testAcceptPropagates() { int acceptSocket = accept(listenSocket, 0, 0); clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}} } + +int main(int argc, char * argv[]) { + if (argc < 1) + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}} + strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1); + system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + return 0; + } + \ No newline at end of file >From a2def04e84038ba7b6c4e699a8433f87dc354dfd Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Fri, 6 Feb 2026 20:42:32 +0100 Subject: [PATCH 2/6] Add tainted argc -argc will be also tainted -split the test files with main functions into more files -add c++ tests -add tests for invalid main function -add test for main with envp --- .../Checkers/GenericTaintChecker.cpp | 38 ++++++++++++----- .../Analysis/taint-diagnostic-visitor-main.c | 36 ++++------------ .../taint-diagnostic-visitor-main.cpp | 41 +++++++++++++++++++ .../taint-diagnostic-visitor-main_envp.c | 33 +++++++++++++++ .../taint-diagnostic-visitor-main_invalid.c | 31 ++++++++++++++ .../taint-diagnostic-visitor-main_trusted.c | 39 ++++++++++++++++++ clang/test/Analysis/taint-generic.c | 1 - 7 files changed, 180 insertions(+), 39 deletions(-) create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main.cpp create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main_envp.c create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 9d88f85c88ad2..60eb804e9b9d6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -63,8 +63,6 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs = constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; -const std::string MsgTaintOrigin = "Taint originated here"; - using ArgIdxTy = int; using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; @@ -161,7 +159,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C, return ""; } if (TaintedSymbols.empty()) - return MsgTaintOrigin; + return "Taint originated here"; for (auto Sym : TaintedSymbols) { BR.markInteresting(Sym); @@ -846,18 +844,38 @@ void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { ProgramStateRef State = C.getState(); const MemRegion *ArgvReg = State->getRegion(FD->parameters()[1], C.getLocationContext()); - SVal ArgvSval = State->getSVal(ArgvReg); - // Add taintedness to argv** - State = addTaint(State, ArgvSval); + SVal ArgvSVal = State->getSVal(ArgvReg); + State = addTaint(State, ArgvSVal); + StringRef ArgvName = FD->parameters()[1]->getName(); + + const MemRegion *ArgcReg = + State->getRegion(FD->parameters()[0], C.getLocationContext()); + SVal ArgcSVal = State->getSVal(ArgcReg); + State = addTaint(State, ArgcSVal); + StringRef ArgcName = FD->parameters()[0]->getName(); + if (auto N = ArgcSVal.getAs<NonLoc>()) { + ConstraintManager &CM = C.getConstraintManager(); + // The upper bound is the ARG_MAX on an arbitrary Linux + // to model that is is typically smaller than INT_MAX. + State = CM.assumeInclusiveRange(State, *N, llvm::APSInt::getUnsigned(1), + llvm::APSInt::getUnsigned(2097152), true); + } const NoteTag *OriginatingTag = - C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string { + C.getNoteTag([ArgvSVal, ArgcSVal, ArgcName, + ArgvName](PathSensitiveBugReport &BR) -> std::string { // We give diagnostics only for taint related reports - if (!BR.isInteresting(ArgvSval) || + if ((!BR.isInteresting(ArgcSVal) && !BR.isInteresting(ArgvSVal)) || BR.getBugType().getCategory() != categories::TaintedData) return ""; - - return MsgTaintOrigin; + std::string Message = "Taint originated in "; + if (BR.isInteresting(ArgvSVal) && BR.isInteresting(ArgcSVal)) + Message += "'" + ArgvName.str() + "' and '" + ArgcName.str() + "'"; + else if (BR.isInteresting(ArgvSVal)) + Message += "'" + ArgvName.str() + "'"; + else + Message += "'" + ArgcName.str() + "'"; + return Message; }); C.addTransition(State, OriginatingTag); } diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c index 315e352cb549f..3ee4d3da1d335 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -1,6 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify=expected,untrusted %s -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify=expected,trusted %s - +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s // This file is for testing enhanced diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; @@ -20,35 +18,17 @@ void free( void *ptr ); char *fgets(char *str, int n, FILE *stream); extern FILE *stdin; - -#ifdef UNTRUSTED // In an untrusted environment the cmd line arguments // are assumed to be tainted. -int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}} - if (argc < 1)// untrusted-note {{'argc' is >= 1}} - // untrusted-note@-1 {{Taking false branch}} - return 1; - char cmd[2048] = "/bin/cat "; - char filename[1024]; - strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint propagated to the 1st argument}} - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note {{Taint propagated to the 1st argument}} - system(cmd);// untrusted-warning {{Untrusted data is passed to a system call}} - // untrusted-note@-1 {{Untrusted data is passed to a system call}} - return 0; - } -#else -int main(int argc, char * argv[]) { - if (argc < 1)// trusted-note {{'argc' is >= 1}} - // trusted-note@-1 {{Taking false branch}} +int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} + if (argc < 1)// expected-note {{'argc' is >= 1}} + // expected-note@-1 {{Taking false branch}} return 1; char cmd[2048] = "/bin/cat "; char filename[1024]; - scanf("%s", filename);// trusted-note {{Taint originated here}} - // trusted-note@-1 {{Taint propagated to the 2nd argument}} - strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// trusted-note {{Taint propagated to the 1st argument}} - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint propagated to the 1st argument}} - system(cmd);// trusted-warning {{Untrusted data is passed to a system call}} - // trusted-note@-1 {{Untrusted data is passed to a system call}} + strncpy(filename, argv[1], sizeof(filename)-1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} + system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1 {{Untrusted data is passed to a system call}} return 0; } - #endif diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp new file mode 100644 index 0000000000000..ac2338ccf6b75 --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s + +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; + +int system(const char *command); +size_t strlen( const char* str ); +char * strncat ( char * destination, const char * source, size_t num ); +char * strncpy ( char * destination, const char * source, size_t num ); + +// In an untrusted environment the cmd line arguments +// are assumed to be tainted. +int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} + if (argc < 1)// expected-note {{'argc' is >= 1}} + // expected-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename)-1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} + system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1 {{Untrusted data is passed to a system call}} + return 0; +} + +//Arguments of main as a class member +//are note taint sources. +//no warning expected +class MyClass{ + int main(int argc, char * argv[]) { + if (argc < 1) + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename)-1); + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1); + system(cmd); + return 0; + } +}; \ No newline at end of file diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c new file mode 100644 index 0000000000000..382417bc628d3 --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c @@ -0,0 +1,33 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s + +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + +int atoi( const char* str ); +int scanf(const char *restrict format, ...); +int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +char *strcat( char *dest, const char *src ); +char * strncat ( char * destination, const char * source, size_t num ); +char* strcpy( char* dest, const char* src ); +char * strncpy ( char * destination, const char * source, size_t num ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +extern FILE *stdin; +#define PATH_MAX 4096 + +// In an untrusted environment the cmd line arguments +// are assumed to be tainted. +int main( int argc, char *argv[], char *envp[] ) {// expected-note {{Taint originated in 'argc'}} + if (argc < 1)// expected-note {{'argc' is >= 1}} + // expected-note@-1 {{Taking false branch}} + return 1; + int v[5]={1,2,3,4,5}; + return v[argc];// expected-warning {{Potential out of bound access to 'v' with tainted index}} + // expected-note@-1 {{Access of 'v' with a tainted index that may be too large}} + } diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c new file mode 100644 index 0000000000000..16a2b92d04b6c --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c @@ -0,0 +1,31 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + +int scanf(const char *restrict format, ...); +int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +char *strcat( char *dest, const char *src ); +char * strncat ( char * destination, const char * source, size_t num ); +char* strcpy( char* dest, const char* src ); +char * strncpy ( char * destination, const char * source, size_t num ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +extern FILE *stdin; + + +// invalid main function +// expected-no-diagnostics + +int main(void) { + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1); + system(cmd); + return 0; +} diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c new file mode 100644 index 0000000000000..2761811a06e43 --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c @@ -0,0 +1,39 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify %s + +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + +int scanf(const char *restrict format, ...); +int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +char *strcat( char *dest, const char *src ); +char * strncat ( char * destination, const char * source, size_t num ); +char* strcpy( char* dest, const char* src ); +char * strncpy ( char * destination, const char * source, size_t num ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +extern FILE *stdin; + +// This is to test that in trusted env +// the diagnostics are constructed so +// that argc or argv are not marked as +// taint origin. +int main(int argc, char * argv[]) { + if (argc < 1)// expected-note {{'argc' is >= 1}} + // expected-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + scanf("%s", filename);// expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the 2nd argument}} + strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} + system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1 {{Untrusted data is passed to a system call}} + return 0; + } diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 93a5b9b3a53cd..62c524bf8e3e1 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -1402,7 +1402,6 @@ int main(int argc, char * argv[]) { if (argc < 1) return 1; char cmd[2048] = "/bin/cat "; - char filename[1024]; clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}} strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1); system(cmd);// expected-warning {{Untrusted data is passed to a system call}} >From 402edc60eade0e8b7efa55dbeff7dd8a20daf220 Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Mon, 9 Feb 2026 18:04:39 +0100 Subject: [PATCH 3/6] Removing superflous declarations from the test files --- .../Analysis/taint-diagnostic-visitor-main.c | 12 +----------- .../taint-diagnostic-visitor-main.cpp | 1 - .../taint-diagnostic-visitor-main_envp.c | 19 ------------------- .../taint-diagnostic-visitor-main_invalid.c | 12 ------------ .../taint-diagnostic-visitor-main_trusted.c | 11 ----------- 5 files changed, 1 insertion(+), 54 deletions(-) diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c index 3ee4d3da1d335..a9cc10324fa64 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -1,22 +1,12 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s // This file is for testing enhanced diagnostics produced by the GenericTaintChecker -typedef __typeof(sizeof(int)) size_t; -struct _IO_FILE; -typedef struct _IO_FILE FILE; -int scanf(const char *restrict format, ...); +typedef __typeof(sizeof(int)) size_t; int system(const char *command); -char* getenv( const char* env_var ); size_t strlen( const char* str ); -char *strcat( char *dest, const char *src ); char * strncat ( char * destination, const char * source, size_t num ); -char* strcpy( char* dest, const char* src ); char * strncpy ( char * destination, const char * source, size_t num ); -void *malloc(size_t size ); -void free( void *ptr ); -char *fgets(char *str, int n, FILE *stream); -extern FILE *stdin; // In an untrusted environment the cmd line arguments // are assumed to be tainted. diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp index ac2338ccf6b75..9588ae9407322 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp @@ -3,7 +3,6 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; - int system(const char *command); size_t strlen( const char* str ); char * strncat ( char * destination, const char * source, size_t num ); diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c index 382417bc628d3..d3e2f8677e762 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c @@ -2,25 +2,6 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker -typedef __typeof(sizeof(int)) size_t; -struct _IO_FILE; -typedef struct _IO_FILE FILE; - -int atoi( const char* str ); -int scanf(const char *restrict format, ...); -int system(const char *command); -char* getenv( const char* env_var ); -size_t strlen( const char* str ); -char *strcat( char *dest, const char *src ); -char * strncat ( char * destination, const char * source, size_t num ); -char* strcpy( char* dest, const char* src ); -char * strncpy ( char * destination, const char * source, size_t num ); -void *malloc(size_t size ); -void free( void *ptr ); -char *fgets(char *str, int n, FILE *stream); -extern FILE *stdin; -#define PATH_MAX 4096 - // In an untrusted environment the cmd line arguments // are assumed to be tainted. int main( int argc, char *argv[], char *envp[] ) {// expected-note {{Taint originated in 'argc'}} diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c index 16a2b92d04b6c..53291fb6b1593 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c @@ -2,21 +2,9 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; -struct _IO_FILE; -typedef struct _IO_FILE FILE; - -int scanf(const char *restrict format, ...); int system(const char *command); -char* getenv( const char* env_var ); size_t strlen( const char* str ); -char *strcat( char *dest, const char *src ); char * strncat ( char * destination, const char * source, size_t num ); -char* strcpy( char* dest, const char* src ); -char * strncpy ( char * destination, const char * source, size_t num ); -void *malloc(size_t size ); -void free( void *ptr ); -char *fgets(char *str, int n, FILE *stream); -extern FILE *stdin; // invalid main function diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c index 2761811a06e43..f321d4c2a7d75 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c @@ -3,21 +3,10 @@ // This file is for testing enhanced diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; -struct _IO_FILE; -typedef struct _IO_FILE FILE; - int scanf(const char *restrict format, ...); int system(const char *command); -char* getenv( const char* env_var ); size_t strlen( const char* str ); -char *strcat( char *dest, const char *src ); char * strncat ( char * destination, const char * source, size_t num ); -char* strcpy( char* dest, const char* src ); -char * strncpy ( char * destination, const char * source, size_t num ); -void *malloc(size_t size ); -void free( void *ptr ); -char *fgets(char *str, int n, FILE *stream); -extern FILE *stdin; // This is to test that in trusted env // the diagnostics are constructed so >From 421e2b95fa51f5639ea2b0f474204f04cc0c2415 Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Fri, 20 Feb 2026 15:49:10 +0100 Subject: [PATCH 4/6] fixup! --- .../test/Analysis/taint-diagnostic-visitor-main.c | 2 +- .../Analysis/taint-diagnostic-visitor-main.cpp | 15 ++++++++++----- .../taint-diagnostic-visitor-main_trusted.c | 6 ++---- clang/test/Analysis/taint-generic.c | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c index a9cc10324fa64..dbe5f2f5a73cc 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -11,7 +11,7 @@ char * strncpy ( char * destination, const char * source, size_t num ); // In an untrusted environment the cmd line arguments // are assumed to be tainted. int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} - if (argc < 1)// expected-note {{'argc' is >= 1}} + if (argc < 2)// expected-note {{'argc' is >= 2}} // expected-note@-1 {{Taking false branch}} return 1; char cmd[2048] = "/bin/cat "; diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp index 9588ae9407322..e587fe10da779 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp @@ -11,7 +11,7 @@ char * strncpy ( char * destination, const char * source, size_t num ); // In an untrusted environment the cmd line arguments // are assumed to be tainted. int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} - if (argc < 1)// expected-note {{'argc' is >= 1}} + if (argc < 2)// expected-note {{'argc' is >= 2}} // expected-note@-1 {{Taking false branch}} return 1; char cmd[2048] = "/bin/cat "; @@ -23,12 +23,17 @@ int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv' return 0; } -//Arguments of main as a class member -//are note taint sources. -//no warning expected + +// Arguments of main as a class member +// are note taint sources. +// no warning expected +// A function declared inside a class or namespace may be named "main" but it +// cannot be _the_ `main` function that is executed at startup. Validate that +// in a case like this the arguments are not marked as tainted and no warning +// is produced. class MyClass{ int main(int argc, char * argv[]) { - if (argc < 1) + if (argc < 2) return 1; char cmd[2048] = "/bin/cat "; char filename[1024]; diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c index f321d4c2a7d75..f102c6aec1c90 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c @@ -8,10 +8,8 @@ int system(const char *command); size_t strlen( const char* str ); char * strncat ( char * destination, const char * source, size_t num ); -// This is to test that in trusted env -// the diagnostics are constructed so -// that argc or argv are not marked as -// taint origin. +// This is to test that in trusted env the diagnostics are constructed so that +// argc or argv are not marked as taint origin. int main(int argc, char * argv[]) { if (argc < 1)// expected-note {{'argc' is >= 1}} // expected-note@-1 {{Taking false branch}} diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 62c524bf8e3e1..660909ed45ed3 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -1399,7 +1399,7 @@ void testAcceptPropagates() { } int main(int argc, char * argv[]) { - if (argc < 1) + if (argc < 2) return 1; char cmd[2048] = "/bin/cat "; clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}} >From 6a93184ec9788b4200b138a2ff175fbbc3e595ec Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Fri, 20 Feb 2026 17:09:18 +0100 Subject: [PATCH 5/6] Applying clang-format fixes to the test files --- .../Analysis/taint-diagnostic-visitor-main.c | 33 +++++++------ .../taint-diagnostic-visitor-main.cpp | 49 ++++++++++--------- .../taint-diagnostic-visitor-main_envp.c | 20 ++++---- .../taint-diagnostic-visitor-main_invalid.c | 24 ++++----- .../taint-diagnostic-visitor-main_trusted.c | 40 +++++++++------ 5 files changed, 90 insertions(+), 76 deletions(-) diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c index dbe5f2f5a73cc..afc90aa1bdb92 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -1,24 +1,25 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s -// This file is for testing enhanced diagnostics produced by the GenericTaintChecker +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \ +// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; int system(const char *command); -size_t strlen( const char* str ); -char * strncat ( char * destination, const char * source, size_t num ); -char * strncpy ( char * destination, const char * source, size_t num ); +size_t strlen(const char *str); +char *strncat(char *destination, const char *source, size_t num); +char *strncpy(char *destination, const char *source, size_t num); // In an untrusted environment the cmd line arguments // are assumed to be tainted. -int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} - if (argc < 2)// expected-note {{'argc' is >= 2}} - // expected-note@-1 {{Taking false branch}} - return 1; - char cmd[2048] = "/bin/cat "; - char filename[1024]; - strncpy(filename, argv[1], sizeof(filename)-1); // expected-note {{Taint propagated to the 1st argument}} - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} - system(cmd);// expected-warning {{Untrusted data is passed to a system call}} +int main(int argc, char *argv[]) { // expected-note {{Taint originated in 'argv'}} + if (argc < 2) // expected-note {{'argc' is >= 2}} + // expected-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}} + system(cmd); // expected-warning {{Untrusted data is passed to a system call}} // expected-note@-1 {{Untrusted data is passed to a system call}} - return 0; - } + return 0; +} diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp index e587fe10da779..5fd961e793a27 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main.cpp +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.cpp @@ -1,29 +1,30 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \ +// RUN: assume-controlled-environment=false -analyzer-output=text -verify %s -// This file is for testing enhanced diagnostics produced by the GenericTaintChecker +// This file is for testing enhanced diagnostics produced by the +// GenericTaintChecker typedef __typeof(sizeof(int)) size_t; int system(const char *command); -size_t strlen( const char* str ); -char * strncat ( char * destination, const char * source, size_t num ); -char * strncpy ( char * destination, const char * source, size_t num ); +size_t strlen(const char *str); +char *strncat(char *destination, const char *source, size_t num); +char *strncpy(char *destination, const char *source, size_t num); // In an untrusted environment the cmd line arguments // are assumed to be tainted. -int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv'}} - if (argc < 2)// expected-note {{'argc' is >= 2}} - // expected-note@-1 {{Taking false branch}} - return 1; - char cmd[2048] = "/bin/cat "; - char filename[1024]; - strncpy(filename, argv[1], sizeof(filename)-1); // expected-note {{Taint propagated to the 1st argument}} - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} - system(cmd);// expected-warning {{Untrusted data is passed to a system call}} +int main(int argc, char *argv[]) { // expected-note {{Taint originated in 'argv'}} + if (argc < 2) // expected-note {{'argc' is >= 2}} + // expected-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}} + system(cmd); // expected-warning {{Untrusted data is passed to a system call}} // expected-note@-1 {{Untrusted data is passed to a system call}} - return 0; + return 0; } - // Arguments of main as a class member // are note taint sources. // no warning expected @@ -31,15 +32,15 @@ int main(int argc, char * argv[]) {// expected-note {{Taint originated in 'argv' // cannot be _the_ `main` function that is executed at startup. Validate that // in a case like this the arguments are not marked as tainted and no warning // is produced. -class MyClass{ - int main(int argc, char * argv[]) { - if (argc < 2) - return 1; +class MyClass { + int main(int argc, char *argv[]) { + if (argc < 2) + return 1; char cmd[2048] = "/bin/cat "; char filename[1024]; - strncpy(filename, argv[1], sizeof(filename)-1); - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1); - system(cmd); + strncpy(filename, argv[1], sizeof(filename) - 1); + strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); + system(cmd); return 0; - } + } }; \ No newline at end of file diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c index d3e2f8677e762..a46166a910fce 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c @@ -1,14 +1,16 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound \ +// RUN: -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s -// This file is for testing enhanced diagnostics produced by the GenericTaintChecker +// This file is for testing enhanced diagnostics produced by the +// GenericTaintChecker // In an untrusted environment the cmd line arguments // are assumed to be tainted. -int main( int argc, char *argv[], char *envp[] ) {// expected-note {{Taint originated in 'argc'}} - if (argc < 1)// expected-note {{'argc' is >= 1}} - // expected-note@-1 {{Taking false branch}} - return 1; - int v[5]={1,2,3,4,5}; - return v[argc];// expected-warning {{Potential out of bound access to 'v' with tainted index}} +int main(int argc, char *argv[], char *envp[]) { // expected-note {{Taint originated in 'argc'}} + if (argc < 2) // expected-note {{'argc' is >= 2}} + // expected-note@-1 {{Taking false branch}} + return 1; + int v[5] = {1, 2, 3, 4, 5}; + return v[argc]; // expected-warning {{Potential out of bound access to 'v' with tainted index}} // expected-note@-1 {{Access of 'v' with a tainted index that may be too large}} - } +} diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c index 53291fb6b1593..fea5f8dd4e8e6 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c @@ -1,19 +1,21 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify %s -// This file is for testing enhanced diagnostics produced by the GenericTaintChecker +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \ +// RUN: assume-controlled-environment=false -analyzer-output=text -verify %s + +// This file is for testing enhanced +// diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; int system(const char *command); -size_t strlen( const char* str ); -char * strncat ( char * destination, const char * source, size_t num ); - +size_t strlen(const char *str); +char *strncat(char *destination, const char *source, size_t num); // invalid main function // expected-no-diagnostics -int main(void) { - char cmd[2048] = "/bin/cat "; - char filename[1024]; - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1); - system(cmd); - return 0; +int main(void) { + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); + system(cmd); + return 0; } diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c index f102c6aec1c90..96ae049311e37 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_trusted.c @@ -1,26 +1,34 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify %s +// RUN: %clang_analyze_cc1 +// -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config +// assume-controlled-environment=true -analyzer-output=text -verify %s -// This file is for testing enhanced diagnostics produced by the GenericTaintChecker +// This file is for testing enhanced diagnostics produced by the +// GenericTaintChecker typedef __typeof(sizeof(int)) size_t; int scanf(const char *restrict format, ...); int system(const char *command); -size_t strlen( const char* str ); -char * strncat ( char * destination, const char * source, size_t num ); +size_t strlen(const char *str); +char *strncat(char *destination, const char *source, size_t num); // This is to test that in trusted env the diagnostics are constructed so that // argc or argv are not marked as taint origin. -int main(int argc, char * argv[]) { - if (argc < 1)// expected-note {{'argc' is >= 1}} +int main(int argc, char *argv[]) { + if (argc < 1) // expected-note {{'argc' is >= 1}} // expected-note@-1 {{Taking false branch}} - return 1; - char cmd[2048] = "/bin/cat "; - char filename[1024]; - scanf("%s", filename);// expected-note {{Taint originated here}} - // expected-note@-1 {{Taint propagated to the 2nd argument}} - strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// expected-note {{Taint propagated to the 1st argument}} - strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// expected-note {{Taint propagated to the 1st argument}} - system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + scanf("%s", + filename); // expected-note {{Taint originated here}} + // expected-note@-1 {{Taint propagated to the 2nd argument}} + strncat(filename, argv[1], + sizeof(filename) - -strlen(argv[1]) - + 1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, + sizeof(cmd) - strlen(cmd) - + 1); // expected-note {{Taint propagated to the 1st argument}} + system(cmd); // expected-warning {{Untrusted data is passed to a system call}} // expected-note@-1 {{Untrusted data is passed to a system call}} - return 0; - } + return 0; +} >From 154170e739c6372337b363a28b789478c323718d Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Fri, 20 Feb 2026 20:09:33 +0100 Subject: [PATCH 6/6] Add taintedness to envp parameter of main too if available --- clang/docs/analyzer/checkers.rst | 8 +-- .../Checkers/GenericTaintChecker.cpp | 64 +++++++++++++++---- .../taint-diagnostic-visitor-main_envp.c | 26 +++++--- .../taint-diagnostic-visitor-main_invalid.c | 2 +- 4 files changed, 73 insertions(+), 27 deletions(-) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index a3ac11935afd8..91e698e4c637a 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1374,12 +1374,12 @@ For a more detailed description of configuration options, please see the * ``Config`` Specifies the name of the YAML configuration file. The user can define their own taint sources and sinks. -* The if the analyzer option ``assume-controlled-environment`` is set to ``false``, +* If the analyzer option ``assume-controlled-environment`` is set to ``false``, it is assumed that the command line arguments and the environment variables of the program are attacker controlled. - In particular, the ``argv`` argument of the ``main`` function and - the return value of the ``getenv()`` function are assumed to - hold tainted values. + In particular, the ``argv``, ``argc`` and ``envp`` arguments of the + ``main`` function and the return value of the ``getenv()`` + function are assumed to hold tainted values. **Related Guidelines** diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 60eb804e9b9d6..88b957b41b020 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -829,6 +829,16 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::make_move_iterator(Rules.end())); } +bool isPointerToCharArray(const QualType &QT) { + if (!QT->isPointerType()) + return false; + QualType PointeeType = QT->getPointeeType(); + if (!PointeeType->isPointerType() || + !PointeeType->getPointeeType()->isCharType()) + return false; + return true; +} + // The incoming parameters of the main function get tainted // if the program called in an untrusted environment. void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { @@ -841,12 +851,12 @@ void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { if (!FD || !FD->isMain() || FD->param_size() < 2) return; + if (!FD->parameters()[0]->getType()->isIntegerType()) + return; + + if (!isPointerToCharArray(FD->parameters()[1]->getType())) + return; ProgramStateRef State = C.getState(); - const MemRegion *ArgvReg = - State->getRegion(FD->parameters()[1], C.getLocationContext()); - SVal ArgvSVal = State->getSVal(ArgvReg); - State = addTaint(State, ArgvSVal); - StringRef ArgvName = FD->parameters()[1]->getName(); const MemRegion *ArgcReg = State->getRegion(FD->parameters()[0], C.getLocationContext()); @@ -861,21 +871,47 @@ void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { llvm::APSInt::getUnsigned(2097152), true); } + const MemRegion *ArgvReg = + State->getRegion(FD->parameters()[1], C.getLocationContext()); + SVal ArgvSVal = State->getSVal(ArgvReg); + State = addTaint(State, ArgvSVal); + StringRef ArgvName = FD->parameters()[1]->getName(); + + bool HaveEnvp = FD->param_size() > 2; + SVal EnvpSVal; + StringRef EnvpName; + if (HaveEnvp && !isPointerToCharArray(FD->parameters()[2]->getType())) + return; + if (HaveEnvp) { + const MemRegion *EnvPReg = + State->getRegion(FD->parameters()[2], C.getLocationContext()); + EnvpSVal = State->getSVal(EnvPReg); + EnvpName = FD->parameters()[2]->getName(); + State = addTaint(State, EnvpSVal); + } + const NoteTag *OriginatingTag = - C.getNoteTag([ArgvSVal, ArgcSVal, ArgcName, - ArgvName](PathSensitiveBugReport &BR) -> std::string { + C.getNoteTag([ArgvSVal, ArgcSVal, ArgcName, ArgvName, EnvpSVal, + EnvpName](PathSensitiveBugReport &BR) -> std::string { // We give diagnostics only for taint related reports - if ((!BR.isInteresting(ArgcSVal) && !BR.isInteresting(ArgvSVal)) || + if ((!BR.isInteresting(ArgcSVal) && !BR.isInteresting(ArgvSVal) && + !BR.isInteresting(EnvpSVal)) || BR.getBugType().getCategory() != categories::TaintedData) return ""; - std::string Message = "Taint originated in "; - if (BR.isInteresting(ArgvSVal) && BR.isInteresting(ArgcSVal)) - Message += "'" + ArgvName.str() + "' and '" + ArgcName.str() + "'"; - else if (BR.isInteresting(ArgvSVal)) + std::string Message = ""; + if (BR.isInteresting(ArgvSVal)) Message += "'" + ArgvName.str() + "'"; - else + if (BR.isInteresting(ArgcSVal)){ + if (Message.size()>0) + Message += ", "; Message += "'" + ArgcName.str() + "'"; - return Message; + } + if (BR.isInteresting(EnvpSVal)){ + if (Message.size()>0) + Message += ", "; + Message += "'" + EnvpName.str() + "'"; + } + return "Taint originated in " + Message; }); C.addTransition(State, OriginatingTag); } diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c index a46166a910fce..0f51aa97fd82c 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_envp.c @@ -4,13 +4,23 @@ // This file is for testing enhanced diagnostics produced by the // GenericTaintChecker -// In an untrusted environment the cmd line arguments -// are assumed to be tainted. -int main(int argc, char *argv[], char *envp[]) { // expected-note {{Taint originated in 'argc'}} - if (argc < 2) // expected-note {{'argc' is >= 2}} - // expected-note@-1 {{Taking false branch}} +typedef __typeof(sizeof(int)) size_t; +int system(const char *command); +size_t strlen(const char *str); +char *strncat(char *destination, const char *source, size_t num); +char *strncpy(char *destination, const char *source, size_t num); + +// In an untrusted environment the the environment variables +// coming through the envp are also tainted. +int main(int argc, char *argv[], char *envp[]) { // expected-note {{Taint originated in 'envp'}} + char cmd[2048] = "/bin/cat "; + char filename[1024]; + if (!envp[0]) // expected-note {{Assuming the condition is false}} + // expected-note@-1 {{Taking false branch}} return 1; - int v[5] = {1, 2, 3, 4, 5}; - return v[argc]; // expected-warning {{Potential out of bound access to 'v' with tainted index}} - // expected-note@-1 {{Access of 'v' with a tainted index that may be too large}} + strncpy(filename, envp[0], sizeof(filename) - 1); // expected-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd) - 1); // expected-note {{Taint propagated to the 1st argument}} + system(cmd); // expected-warning {{Untrusted data is passed to a system call}} + // expected-note@-1 {{Untrusted data is passed to a system call}} + return 0; } diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c index fea5f8dd4e8e6..5e4e1cc5b1b30 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c +++ b/clang/test/Analysis/taint-diagnostic-visitor-main_invalid.c @@ -1,7 +1,7 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config \ // RUN: assume-controlled-environment=false -analyzer-output=text -verify %s -// This file is for testing enhanced +// This file is for testing enhanced // diagnostics produced by the GenericTaintChecker typedef __typeof(sizeof(int)) size_t; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
