vabridgers created this revision. Herald added subscribers: steakhal, ASDenysPetrov, martong, Charusso, dkrupp, donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, xazax.hun, whisperity. vabridgers requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
This change addresses the specific cases described in these cases, addresses another related escaped defect and adds covering LITs. https://bugs.llvm.org/show_bug.cgi?id=43364 https://bugs.llvm.org/show_bug.cgi?id=44114 This patch was co-authored with Balazs Benics (steakhal), and debug assistance from Gabor Marton (martong). Region store is taught to recognize punned concrete integers and extract the data in an endianess correct way for scalars and structures. The escaped defect was incorrect data extracted for big endian targets under analysis in a particular case. Co-authored-by: Balazs Benics <benicsbal...@gmail.com> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D93595 Files: clang/lib/StaticAnalyzer/Core/RegionStore.cpp clang/test/Analysis/concrete-endian.cpp clang/test/Analysis/punning-structures.cpp
Index: clang/test/Analysis/punning-structures.cpp =================================================================== --- /dev/null +++ clang/test/Analysis/punning-structures.cpp @@ -0,0 +1,162 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -triple x86_64-unknown-linux-gnu -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -triple powerpc64-unknown-linux-gnu -verify %s + +void clang_analyzer_dump(int); +void clang_analyzer_eval(int); + +int printf(const char *__restrict, ...); + +typedef unsigned int uint32_t; +typedef __typeof(sizeof(int)) size_t; + +// case-38931-1 +typedef struct { + unsigned short var; +} A; + +typedef struct { + unsigned short var; +} B; + +typedef struct { + unsigned long var; +} C; + +typedef struct { + long a, b; +} X; + +void fn(char); + +extern int foo(char); + +// Cases to cover reproducer in https://bugs.llvm.org/show_bug.cgi?id=43364. +// Still TODO!!! +int blah(A *param, int *x) { + if (param->var != 0) // reg_$1<SymRegion{reg_$0<param>}->RetCode> + return ((B *)param)->var; // reg_$2{element{B, 0 S32b, SymRegion{reg_$0<param>}->RetCore} + *x = 1; + return 0; +} + +int foo(A *param) { + int x; + if (blah(param, &x) != 0) { + return 0; + } + // FIXME!! Should be no warning!! https://bugs.llvm.org/show_bug.cgi?id=43364 + return x; // expected-warning{{}} false positive: "returning garbage value". +} + +// case-38931-2 +int fee(int *a) { + int *b = a; + clang_analyzer_eval(*b == *a); // expected-warning{{TRUE}} + return 0; +} + +int foonew(A *s1) { + // s1->code = 0; // uncomment this to see the case pass. + B *s2 = (B *)s1; + + // FIXME!! Should be TRUE. https://bugs.llvm.org/show_bug.cgi?id=43364 + //clang_analyzer_eval(s2->var == s1->var); // e xpected-warning{{TRUE}} + //clang_analyzer_dump(s1->RetCode); // reg_$2<int SymRegion{reg_$0<A * s1>}.code> + //clang_analyzer_dump(s2->RetCode); // reg_$1<int Element{SymRegion{reg_$0<A * s1>},0 S64b,B}.code> + return 0; +} + +// Derivative reproducer for case https://bugs.llvm.org/show_bug.cgi?id=39032 +int structpunning(void) { + A localvar = {0x1122}; + + char *p = (char *)&localvar; + int x = 0; + if (p[0]) + x += 1; + if (p[1]) // Branch condition evaluates to a garbage value [core.uninitialized.Branch] + x += 1; + return x; +} + +// Derivative reproducer for case https://bugs.llvm.org/show_bug.cgi?id=39032 +int case15982(void) { + unsigned long myVar1 = 654321UL; + char *p1 = (char *)&myVar1; + foo(p1[0]); + foo(p1[1]); + + C myVar2 = {654321UL}; + char *p2 = (char *)&myVar2.var; + foo(p2[0]); + foo(p2[1]); + + C myVar3 = {654321UL}; + char *p3 = (char *)&myVar3; + foo(p3[0]); + foo(p3[1]); + + return 0; +} + +// Case to cover reproducer in https://bugs.llvm.org/show_bug.cgi?id=39032 +void fn2() { + X str = {0, 0}; + char *ptr = (char *)&str; + fn(ptr[1]); +} + +void fn3() { + long long xx = 0; + char *ptr = (char *)&xx; + fn(ptr[200]); +} + +// Case to cover reproducer in https://bugs.llvm.org/show_bug.cgi?id=44114 +struct device_registers { + uint32_t n1; + uint32_t n2; + uint32_t n3; + uint32_t n4; + uint32_t n5; +} __attribute__((packed)); + +static_assert(20 == sizeof(struct device_registers), "unexpected struct size"); + +static void CopyDeviceRegisters( + struct device_registers volatile &dest, + struct device_registers volatile *src) { + dest.n1 = src->n1; + dest.n2 = src->n2; + dest.n3 = src->n3; + dest.n4 = src->n4; + dest.n5 = src->n5; +} + +// this is a pretend process_bytes() from boost/crc.hpp +// (where it's a member function) +void process_bytes(void const *buffer, size_t byte_count) { + // "checksum creation" + unsigned char const *const b = static_cast<unsigned char const *>(buffer); + for (size_t i = 0; i < byte_count; i++) { + printf("%02X ", b[i]); + } + printf("\n"); +} + +int case44114() { + // in our code this is a uio mapping + struct device_registers mock {}; + struct device_registers volatile *mapped_regs = &mock; + + // purpose is to get a copy of the volatile mapping to compute a + // checksum + struct device_registers shadow; + + CopyDeviceRegisters(shadow, mapped_regs); + + // now we merely need to read from the (shadow) struct to mimick + // what the real process_bytes() would do ... + process_bytes(&shadow, sizeof(shadow)); + return 0; +} Index: clang/test/Analysis/concrete-endian.cpp =================================================================== --- /dev/null +++ clang/test/Analysis/concrete-endian.cpp @@ -0,0 +1,201 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -triple x86_64-unknown-linux-gnu -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -triple powerpc64-unknown-linux-gnu -verify %s + +void clang_analyzer_dump(int); +void clang_analyzer_eval(int); + +int testLocConcreteInts() { + static_assert(sizeof(char) == 1, "test assumes sizeof(char) is 1"); + static_assert(sizeof(short) == 2, "test assumes sizeof(short) is 2"); + static_assert(sizeof(int) == 4, "test assumes sizeof(int) is 4"); + static_assert(sizeof(long) == 8, "test assumes sizeof(long) is 8"); + static_assert(sizeof(long *) == 8, "test assumes sizeof(long *) is 8"); + long *p = (long *)0x11223344AA998877ULL; + unsigned char *ppb = (unsigned char *)&p; + unsigned short *pps = (unsigned short *)&p; + unsigned int *ppi = (unsigned int *)&p; + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(ppb[0] == 0x77); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[1] == 0x88); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[2] == 0x99); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[3] == 0xaa); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[4] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[5] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[6] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[7] == 0x11); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(ppb[7] == 0x77); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[6] == 0x88); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[5] == 0x99); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[4] == 0xaa); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[3] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[2] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[1] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(ppb[0] == 0x11); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(ppb[8]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(ppb[-1]); // expected-warning{{UNKNOWN}} + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(pps[0] == 0x8877); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[1] == 0xaa99); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[2] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[3] == 0x1122); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(pps[3] == 0x8877); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[2] == 0xaa99); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[1] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(pps[0] == 0x1122); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(pps[4]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pps[-1]); // expected-warning{{UNKNOWN}} + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(ppi[0] == 0xaa998877); // expected-warning{{TRUE}} + clang_analyzer_eval(ppi[1] == 0x11223344); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(ppi[0] == 0x11223344); // expected-warning{{TRUE}} + clang_analyzer_eval(ppi[1] == 0xaa998877); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(ppi[2]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(ppi[-1]); // expected-warning{{UNKNOWN}} + + return 0; +} + +int testNonlocConcreteInts() { + static_assert(sizeof(char) == 1, "test assumes sizeof(char) is 1"); + static_assert(sizeof(short) == 2, "test assumes sizeof(short) is 2"); + static_assert(sizeof(int) == 4, "test assumes sizeof(int) is 4"); + static_assert(sizeof(long) == 8, "test assumes sizeof(long) is 8"); + static_assert(sizeof(long *) == 8, "test assumes sizeof(long *) is 8"); + + unsigned short sh = 0x1122; + unsigned char *pbsh = (unsigned char *)&sh; + + unsigned int i = 0x11223344; + unsigned char *pbi = (unsigned char *)&i; + unsigned short *psi = (unsigned short *)&i; + + unsigned long ll = 0x11223344AA998877ULL; + unsigned char *pbll = (unsigned char *)≪ + unsigned short *psll = (unsigned short *)≪ + unsigned int *pill = (unsigned int *)≪ + + // Uses built macro to determine endianess. + // Use "clang -dM -E -x c /dev/null" to dump macros. + // This test uses __LITTLE_ENDIAN__ and __BIG_ENDIAN__ + + // First, the short endianess test section +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(pbsh[0] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(pbsh[1] == 0x11); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(pbsh[0] == 0x11); // expected-warning{{TRUE}} + clang_analyzer_eval(pbsh[1] == 0x22); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(pbsh[2]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pbsh[-1]); // expected-warning{{UNKNOWN}} + + // The "int" endianess test section +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(pbi[0] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[1] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[2] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[3] == 0x11); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(pbi[3] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[2] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[1] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(pbi[0] == 0x11); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(pbi[4]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pbi[-1]); // expected-warning{{UNKNOWN}} + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(psi[0] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(psi[1] == 0x1122); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(psi[1] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(psi[0] == 0x1122); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(psi[2]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(psi[-1]); // expected-warning{{UNKNOWN}} + + // The "long" endianess test section +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(pbll[0] == 0x77); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[1] == 0x88); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[2] == 0x99); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[3] == 0xaa); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[4] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[5] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[6] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[7] == 0x11); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(pbll[7] == 0x77); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[6] == 0x88); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[5] == 0x99); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[4] == 0xaa); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[3] == 0x44); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[2] == 0x33); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[1] == 0x22); // expected-warning{{TRUE}} + clang_analyzer_eval(pbll[0] == 0x11); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(pbll[8]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pbll[-1]); // expected-warning{{UNKNOWN}} + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(psll[0] == 0x8877); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[1] == 0xaa99); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[2] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[3] == 0x1122); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(psll[3] == 0x8877); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[2] == 0xaa99); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[1] == 0x3344); // expected-warning{{TRUE}} + clang_analyzer_eval(psll[0] == 0x1122); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(psll[4]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(psll[-1]); // expected-warning{{UNKNOWN}} + +#if defined(__LITTLE_ENDIAN__) + clang_analyzer_eval(pill[0] == 0xaa998877); // expected-warning{{TRUE}} + clang_analyzer_eval(pill[1] == 0x11223344); // expected-warning{{TRUE}} +#elif defined(__BIG_ENDIAN__) + clang_analyzer_eval(pill[0] == 0x11223344); // expected-warning{{TRUE}} + clang_analyzer_eval(pill[1] == 0xaa998877); // expected-warning{{TRUE}} +#else +#error "Don't recognize the endianess of this target!" +#endif + // Array out of bounds should yield UNKNOWN + clang_analyzer_eval(pill[2]); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pill[-1]); // expected-warning{{UNKNOWN}} + + return 0; +} Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1626,11 +1626,56 @@ return Result; } +static SVal getSValAsConcreteInt(SValBuilder &SVB, SVal V, QualType baseT, + QualType elemT, + const RegionRawOffset &ORegionRawOffs) { + ASTContext &Ctx = SVB.getContext(); + + const llvm::APSInt *ConcreteValue = [&V](SVal Val) { + if (auto Int = V.getAs<nonloc::ConcreteInt>()) + return &Int->getValue(); + return &V.getAs<loc::ConcreteInt>()->getValue(); + }(V); + assert(ConcreteValue); + + const unsigned bitPosition = [&]() { + unsigned bitPos; + if (Ctx.getTargetInfo().isBigEndian()) + bitPos = (Ctx.getTypeSizeInChars(baseT).getQuantity() - + Ctx.getTypeSizeInChars(elemT).getQuantity()) - + ORegionRawOffs.getOffset().getQuantity(); + else + bitPos = ORegionRawOffs.getOffset().getQuantity(); + return bitPos * Ctx.getCharWidth(); + }(); + const unsigned numBits = + Ctx.getCharWidth() * Ctx.getTypeSizeInChars(elemT).getQuantity(); + if (bitPosition < ConcreteValue->getBitWidth() && + (numBits + bitPosition) <= ConcreteValue->getBitWidth()) { + llvm::APInt bits = ConcreteValue->extractBits(numBits, bitPosition); + return SVB.makeIntVal(bits, true); + } + return UnknownVal(); +} + SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const ElementRegion* R) { // Check if the region has a binding. - if (const Optional<SVal> &V = B.getDirectBinding(R)) + if (const Optional<SVal> &V = B.getDirectBinding(R)) { + if (V->isConstant()) { + const RegionRawOffset &O = R->getAsArrayOffset(); + if (const TypedValueRegion *baseR = + dyn_cast_or_null<TypedValueRegion>(O.getRegion())) { + QualType baseT = baseR->getValueType(); + if (baseT->isScalarType()) { + QualType elemT = R->getElementType(); + if (elemT->isScalarType()) + return getSValAsConcreteInt(svalBuilder, *V, baseT, elemT, O); + } + } + } return *V; + } const MemRegion* superR = R->getSuperRegion(); @@ -1711,7 +1756,7 @@ if (const TypedValueRegion *baseR = dyn_cast_or_null<TypedValueRegion>(O.getRegion())) { QualType baseT = baseR->getValueType(); - if (baseT->isScalarType()) { + if (baseT->isScalarType() || baseT->isStructureType()) { QualType elemT = R->getElementType(); if (elemT->isScalarType()) { if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) { @@ -1721,8 +1766,10 @@ if (V->isUnknownOrUndef()) return *V; - // Other cases: give up. We are indexing into a larger object - // that has some value, but we don't know how to handle that yet. + + if (V->isConstant()) + return getSValAsConcreteInt(svalBuilder, *V, baseT, elemT, O); + return UnknownVal(); } }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits