https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/132765
From f0ac1f6c223b3bfce25ba0183ba1aa2825c455ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 24 Mar 2025 16:58:31 +0100 Subject: [PATCH 1/2] [NFC][analyzer] Add testcase to highlight GenericTaint bug Currently `optin.taint.GenericTaint` can produce false positives if a [format attribute](https://clang.llvm.org/docs/AttributeReference.html#format) is applied on a non-static method. This commit adds a testcase that highlights this buggy behavior. --- clang/test/Analysis/taint-generic.cpp | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index 8836e1d3d2d98..41fbe313d2b44 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -161,3 +161,46 @@ void top() { clang_analyzer_isTainted(A.data); // expected-warning {{YES}} } } // namespace gh114270 + + +namespace format_attribute { +__attribute__((__format__ (__printf__, 1, 2))) +void log_nonmethod(const char *fmt, ...); + +void test_format_attribute_nonmethod() { + int n; + fscanf(stdin, "%d", &n); // Get a tainted value. + + log_nonmethod("This number is suspicious: %d\n", n); // no-warning +} + +struct Foo { + // When the format attribute is applied to a method, argumet '1' is the + // implicit `this`, so e.g. in this case argument '2' specifies `fmt`. + // Specifying '1' instead of '2' would produce a compilation error: + // "format attribute cannot specify the implicit this argument as the format string" + __attribute__((__format__ (__printf__, 2, 3))) + void log_method(const char *fmt, ...); + + void test_format_attribute_method() { + int n; + fscanf(stdin, "%d", &n); // Get a tainted value. + + // FIXME: The analyzer misinterprets the parameter indices in the format + // attribute when the format attribute is applied to a method. + log_method("This number is suspicious: %d\n", n); + // expected-warning@-1 {{Untrusted data is used as a format string}} + } + + __attribute__((__format__ (__printf__, 1, 2))) + static void log_static_method(const char *fmt, ...); + + void test_format_attribute_static_method() { + int n; + fscanf(stdin, "%d", &n); // Get a tainted value. + + log_static_method("This number is suspicious: %d\n", n); // no-warning + } +}; + +} // namespace format_attribute From a97854213898d42c76bebffc8a093558cfc7e89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 24 Mar 2025 17:44:10 +0100 Subject: [PATCH 2/2] [analyzer] Fix format attribute handling in GenericTaintChecker Previously the `optin.taint.GenericTaint` checker misinterpreted the parameter indices in situations when the format attribute was applied to a (non-static) method. This commit fixes this bug. --- .../Checkers/GenericTaintChecker.cpp | 16 ++++++++++++++++ clang/test/Analysis/taint-generic.cpp | 5 ++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index b89a6e2588c98..1b61370a580d9 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -1080,7 +1080,23 @@ static bool getPrintfFormatArgumentNum(const CallEvent &Call, const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { + // The format attribute uses 1-based parameter indexing, for example + // plain `printf(const char *fmt, ...)` would be annotated with + // `__format__(__printf__, 1, 2)`, so we need to subtract 1 to get a + // 0-based index. (This checker uses 0-based parameter indices.) ArgNum = Format->getFormatIdx() - 1; + // The format attribute also counts the implicit `this` parameter of + // methods, so e.g. in `SomeClass::method(const char *fmt, ...)` could be + // annotated with `__format__(__printf__, 2, 3)`. This checker doesn't + // count the implicit `this` parameter, so in this case we need to subtract + // one again. + // FIXME: Apparently the implementation of the format attribute doesn't + // support methods with an explicit object parameter, so we cannot + // implement proper support for that rare case either. + const CXXMethodDecl *MDecl = dyn_cast<CXXMethodDecl>(FDecl); + if (MDecl && !MDecl->isStatic()) + ArgNum--; + if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum) return true; } diff --git a/clang/test/Analysis/taint-generic.cpp b/clang/test/Analysis/taint-generic.cpp index 41fbe313d2b44..3bf3ca3a75099 100644 --- a/clang/test/Analysis/taint-generic.cpp +++ b/clang/test/Analysis/taint-generic.cpp @@ -186,10 +186,9 @@ struct Foo { int n; fscanf(stdin, "%d", &n); // Get a tainted value. - // FIXME: The analyzer misinterprets the parameter indices in the format + // The analyzer used to misinterpret the parameter indices in the format // attribute when the format attribute is applied to a method. - log_method("This number is suspicious: %d\n", n); - // expected-warning@-1 {{Untrusted data is used as a format string}} + log_method("This number is suspicious: %d\n", n); // no-warning } __attribute__((__format__ (__printf__, 1, 2))) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits