ASDenysPetrov updated this revision to Diff 381533.
ASDenysPetrov added a comment.
Fixed test unpassing in `array-struct-region.c`. Added examples in comments
according to suggestions.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D107339/new/
https://reviews.llvm.org/D107339
Files:
clang/lib/StaticAnalyzer/Core/RegionStore.cpp
clang/test/Analysis/initialization.cpp
Index: clang/test/Analysis/initialization.cpp
===================================================================
--- clang/test/Analysis/initialization.cpp
+++ clang/test/Analysis/initialization.cpp
@@ -146,3 +146,110 @@
void struct_arr_index1() {
clang_analyzer_eval(S2::arr_no_init[2]); // expected-warning{{UNKNOWN}}
}
+
+char const glob_arr6[5] = "123";
+void glob_array_index5() {
+ clang_analyzer_eval(glob_arr6[0] == '1'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr6[1] == '2'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr6[2] == '3'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr6[3] == '\0'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr6[4] == '\0'); // expected-warning{{TRUE}}
+}
+
+void glob_ptr_index3() {
+ char const *ptr = glob_arr6;
+ clang_analyzer_eval(ptr[-42] == '\0'); // expected-warning{{UNDEFINED}}
+ clang_analyzer_eval(ptr[0] == '1'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(ptr[1] == '2'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(ptr[2] == '3'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(ptr[3] == '\0'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(ptr[4] == '\0'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(ptr[5] == '\0'); // expected-warning{{UNDEFINED}}
+ clang_analyzer_eval(ptr[6] == '\0'); // expected-warning{{UNDEFINED}}
+}
+
+void glob_invalid_index7() {
+ int idx = -42;
+ auto x = glob_arr6[idx]; // expected-warning{{garbage or undefined}}
+}
+
+void glob_invalid_index8() {
+ const char *ptr = glob_arr6;
+ int idx = 42;
+ auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
+}
+
+char const glob_arr7[5] = {"123"};
+void glob_array_index6() {
+ clang_analyzer_eval(glob_arr7[0] == '1'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr7[1] == '2'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr7[2] == '3'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr7[3] == '\0'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_arr7[4] == '\0'); // expected-warning{{TRUE}}
+}
+
+void glob_invalid_index9() {
+ int idx = -42;
+ auto x = glob_arr7[idx]; // expected-warning{{garbage or undefined}}
+}
+
+void glob_invalid_index10() {
+ const char *ptr = glob_arr7;
+ int idx = 42;
+ auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
+}
+
+char const *const glob_ptr8 = "123";
+void glob_ptr_index4() {
+ clang_analyzer_eval(glob_ptr8[0] == '1'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr8[1] == '2'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr8[2] == '3'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr8[3] == '\0'); // expected-warning{{TRUE}}
+ // FIXME: Should be UNDEFINED.
+ // We should take into account a declaration in which the literal is used.
+ clang_analyzer_eval(glob_ptr8[4] == '\0'); // expected-warning{{TRUE}}
+}
+
+void glob_invalid_index11() {
+ int idx = -42;
+ auto x = glob_ptr8[idx]; // expected-warning{{garbage or undefined}}
+}
+
+void glob_invalid_index12() {
+ int idx = 42;
+ // FIXME: Should warn {{garbage or undefined}}
+ // We should take into account a declaration in which the literal is used.
+ auto x = glob_ptr8[idx]; // no-warning
+}
+
+const char16_t *const glob_ptr9 = u"абв";
+void glob_ptr_index5() {
+ clang_analyzer_eval(glob_ptr9[0] == u'а'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr9[1] == u'б'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr9[2] == u'в'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr9[3] == '\0'); // expected-warning{{TRUE}}
+}
+
+const char32_t *const glob_ptr10 = U"\U0001F607\U0001F608\U0001F609";
+void glob_ptr_index6() {
+ clang_analyzer_eval(glob_ptr10[0] == U'\U0001F607'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr10[1] == U'\U0001F608'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr10[2] == U'\U0001F609'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr10[3] == '\0'); // expected-warning{{TRUE}}
+}
+
+const wchar_t *const glob_ptr11 = L"\123\u0041\xFF";
+void glob_ptr_index7() {
+ clang_analyzer_eval(glob_ptr11[0] == L'\123'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr11[1] == L'\u0041'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr11[2] == L'\xFF'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr11[3] == L'\0'); // expected-warning{{TRUE}}
+}
+
+const char *const glob_ptr12 = u8"abc";
+void glob_ptr_index8() {
+ clang_analyzer_eval(glob_ptr12[0] == 'a'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr12[1] == 'b'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr12[2] == 'c'); // expected-warning{{TRUE}}
+ clang_analyzer_eval(glob_ptr12[3] == '\0'); // expected-warning{{TRUE}}
+}
Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -441,6 +441,8 @@
RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R);
Optional<SVal> getSValFromInitListExpr(const InitListExpr *ILE,
uint64_t Offset, QualType ElemT);
+ SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset,
+ QualType ElemT);
public: // Part of public interface to class.
@@ -1700,10 +1702,16 @@
// From here `Offset` is in the bounds.
// Handle InitListExpr.
+ // Example:
+ // const char arr[] = { 1, 2, 3 };
if (const auto *ILE = dyn_cast<InitListExpr>(Init))
return getSValFromInitListExpr(ILE, Offset, R->getElementType());
- // FIXME: Handle StringLiteral.
+ // Handle StringLiteral.
+ // Example:
+ // const char arr[] = "abc";
+ if (const auto *SL = dyn_cast<StringLiteral>(Init))
+ return getSValFromStringLiteral(SL, Offset, R->getElementType());
// FIXME: Handle CompoundLiteralExpr.
@@ -1715,6 +1723,15 @@
uint64_t Offset, QualType ElemT) {
assert(ILE && "InitListExpr should not be null");
+ // C++20 [dcl.init.string] 9.4.2.1:
+ // An array of ordinary character type [...] can be initialized by [...]
+ // an appropriately-typed string-literal enclosed in braces.
+ // Example:
+ // const char arr[] = { "abc" };
+ if (ILE->isStringLiteralInit())
+ if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0)))
+ return getSValFromStringLiteral(SL, Offset, ElemT);
+
// C++20 [expr.add] 9.4.17.5 (excerpt):
// i-th array element is value-initialized for each k < i ⤠n,
// where k is an expression-list size and n is an array extent.
@@ -1727,6 +1744,24 @@
return svalBuilder.getConstantVal(E);
}
+SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
+ uint64_t Offset,
+ QualType ElemT) {
+ assert(SL && "StringLiteral should not be null");
+ // C++20 [dcl.init.string] 9.4.2.3:
+ // If there are fewer initializers than there are array elements, each
+ // element not explicitly initialized shall be zero-initialized [dcl.init].
+ // NOTE: We return `0` for every offset >= the literal length for cases, like:
+ // char str[42] = "123"; // literal length is 4
+ // char c = str[41]; // offset is 41
+ // FIXME: Nevertheless, we can't do the same for cases, like:
+ // const char *str = "123"; // literal length is 4
+ // char c = str[41]; // offset is 41
+ // It should be properly handled before reaching this point.
+ uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset);
+ return svalBuilder.makeIntVal(Code, ElemT);
+}
+
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
const ElementRegion* R) {
// Check if the region has a binding.
@@ -1738,26 +1773,18 @@
// Check if the region is an element region of a string literal.
if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) {
// FIXME: Handle loads from strings where the literal is treated as
- // an integer, e.g., *((unsigned int*)"hello")
+ // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according
+ // to C++20 7.2.1.11 [basic.lval].
QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType();
if (!Ctx.hasSameUnqualifiedType(T, R->getElementType()))
return UnknownVal();
-
- const StringLiteral *Str = StrR->getStringLiteral();
- SVal Idx = R->getIndex();
- if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) {
- int64_t i = CI->getValue().getSExtValue();
- // Abort on string underrun. This can be possible by arbitrary
- // clients of getBindingForElement().
- if (i < 0)
+ if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
+ const StringLiteral *SL = StrR->getStringLiteral();
+ const llvm::APSInt &Idx = CI->getValue();
+ const auto Offset = static_cast<uint64_t>(Idx.getExtValue());
+ if (Idx < 0)
return UndefinedVal();
- int64_t length = Str->getLength();
- // Technically, only i == length is guaranteed to be null.
- // However, such overflows should be caught before reaching this point;
- // the only time such an access would be made is if a string literal was
- // used to initialize a larger array.
- char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
- return svalBuilder.makeIntVal(c, T);
+ return getSValFromStringLiteral(SL, Offset, T);
}
} else if (const VarRegion *VR = dyn_cast<VarRegion>(superR)) {
if (Optional<SVal> V = getConstantValFromConstArrayInitializer(B, VR, R))
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits