chrish_ericsson_atx created this revision.
Herald added subscribers: cfe-commits, arphaman.
Herald added a project: clang.
chrish_ericsson_atx requested review of this revision.

Check applied to unbounded (incomplete) arrays and pointers
to spot cases where the computed address is beyond the
largest possible addressable extent of the array, based
on the address space in which the array is delcared, or
which the pointer refers to.

Check helps to avoid cases of nonsense pointer math and
array indexing which could lead to linker failures or
runtime exceptions.  Of particular interest when building
for embedded systems with small address spaces.

Change-Id: I836042912f0f7ba4ee774ac03d2fb47d987709e2


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86796

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/const-eval.c
  clang/test/Sema/unbounded-array-bounds.c
  clang/test/SemaCXX/constant-expression-cxx1y.cpp

Index: clang/test/SemaCXX/constant-expression-cxx1y.cpp
===================================================================
--- clang/test/SemaCXX/constant-expression-cxx1y.cpp
+++ clang/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1018,8 +1018,9 @@
 }
 
 constexpr void PR28739(int n) { // expected-error {{never produces a constant}}
-  int *p = &n;
+  int *p = &n;                  // expected-note {{declared here}}
   p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}}
+  // expected-warning@-1 {{refers past the last possible element}}
 }
 
 constexpr void Void(int n) {
Index: clang/test/Sema/unbounded-array-bounds.c
===================================================================
--- /dev/null
+++ clang/test/Sema/unbounded-array-bounds.c
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu   -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple avr-pc-linux-gnu    -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+
+struct S {
+  long long a;
+  char b;
+  long long c;
+  short d;
+};
+
+struct S s[];
+
+void f1() {
+  ++s[3].a;
+  ++s[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++s[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+long long ll[];
+
+void f2() {
+  ++ll[3];
+  ++ll[2705843009213693952];
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+  ++ll[847073650];
+  // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+}
+
+void f3(struct S p[]) {
+  ++p[3].a;
+  ++p[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++p[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+void f4(struct S *p) {
+  p += 3;
+  p += 7073650413200313099;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  p += 7073650;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
Index: clang/test/Sema/const-eval.c
===================================================================
--- clang/test/Sema/const-eval.c
+++ clang/test/Sema/const-eval.c
@@ -140,10 +140,10 @@
 
 // We evaluate these by providing 2s' complement semantics in constant
 // expressions, like we do for integers.
-void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;
-void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;
-__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c;
-void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];
+void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;                  // expected-warning {{refers past the last possible element}}
+void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;                  // expected-warning {{refers past the last possible element}}
+__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}}
+void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];                // expected-warning {{refers past the last possible element}}
 
 struct PR35214_X {
   int k;
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -13941,11 +13941,9 @@
   const ConstantArrayType *ArrayTy =
       Context.getAsConstantArrayType(BaseExpr->getType());
 
-  if (!ArrayTy)
-    return;
-
-  const Type *BaseType = ArrayTy->getElementType().getTypePtr();
-  if (EffectiveType->isDependentType() || BaseType->isDependentType())
+  const Type *BaseType = ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr();
+  bool isUnboundedArray = (BaseType == nullptr);
+  if (EffectiveType->isDependentType() || ( !isUnboundedArray && BaseType->isDependentType()))
     return;
 
   Expr::EvalResult Result;
@@ -13963,77 +13961,122 @@
     ND = ME->getMemberDecl();
 
   if (index.isUnsigned() || !index.isNegative()) {
-    // It is possible that the type of the base expression after
-    // IgnoreParenCasts is incomplete, even though the type of the base
-    // expression before IgnoreParenCasts is complete (see PR39746 for an
-    // example). In this case we have no information about whether the array
-    // access exceeds the array bounds. However we can still diagnose an array
-    // access which precedes the array bounds.
-    if (BaseType->isIncompleteType())
-      return;
-
-    llvm::APInt size = ArrayTy->getSize();
-    if (!size.isStrictlyPositive())
-      return;
+    if (isUnboundedArray) {
+      const auto &ASTC = getASTContext();
+      unsigned AddrBits =
+        ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace(
+              EffectiveType->getCanonicalTypeInternal()));
+      unsigned ElemBits = ASTC.getTypeSize(EffectiveType);
+
+      if (0 == ElemBits) // assume 1 bit for type==void
+        ElemBits = 1;
+      uint32_t apsbits = 128;
+      if (index.getBitWidth() < apsbits)
+        index = index.extend(apsbits); // sign-extend to 128 bits, if necessary
+      else
+        apsbits = index.getBitWidth();
 
-    if (BaseType != EffectiveType) {
-      // Make sure we're comparing apples to apples when comparing index to size
-      uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
-      uint64_t array_typesize = Context.getTypeSize(BaseType);
-      // Handle ptrarith_typesize being zero, such as when casting to void*
-      if (!ptrarith_typesize) ptrarith_typesize = 1;
-      if (ptrarith_typesize != array_typesize) {
-        // There's a cast to a different size type involved
-        uint64_t ratio = array_typesize / ptrarith_typesize;
-        // TODO: Be smarter about handling cases where array_typesize is not a
-        // multiple of ptrarith_typesize
-        if (ptrarith_typesize * ratio == array_typesize)
-          size *= llvm::APInt(size.getBitWidth(), ratio);
+      llvm::APSInt ElemBytes(apsbits, true);
+      unsigned CharBits = ASTC.getCharWidth();
+      for (unsigned bits = 0; bits < ElemBits; bits += CharBits) {
+        ++ElemBytes;
       }
+      llvm::APSInt MaxElems(apsbits, true);
+      MaxElems = 1UL;
+      MaxElems <<= AddrBits;
+      MaxElems /= ElemBytes;
+
+      if (llvm::APSInt::compareValues(index, MaxElems) <= 0)
+        return;
+
+      unsigned DiagID = diag::warn_ptr_arith_exceeds_max_addressable_bounds;
+      if (ASE)
+        DiagID = diag::warn_array_index_exceeds_max_addressable_bounds;
+
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+          PDiag(DiagID)
+          << index.toString(10, true) << AddrBits
+          << ElemBits << ElemBytes.toString(10, true)
+          << MaxElems.toString(10, true)
+          << (unsigned)MaxElems.getLimitedValue(~0U)
+          << IndexExpr->getSourceRange());
     }
+    else {
+      // It is possible that the type of the base expression after
+      // IgnoreParenCasts is incomplete, even though the type of the base
+      // expression before IgnoreParenCasts is complete (see PR39746 for an
+      // example). In this case we have no information about whether the array
+      // access exceeds the array bounds. However we can still diagnose an array
+      // access which precedes the array bounds.
+      if (BaseType->isIncompleteType())
+        return;
 
-    if (size.getBitWidth() > index.getBitWidth())
-      index = index.zext(size.getBitWidth());
-    else if (size.getBitWidth() < index.getBitWidth())
-      size = size.zext(index.getBitWidth());
+      llvm::APInt size = ArrayTy->getSize();
+      if (!size.isStrictlyPositive())
+        return;
 
-    // For array subscripting the index must be less than size, but for pointer
-    // arithmetic also allow the index (offset) to be equal to size since
-    // computing the next address after the end of the array is legal and
-    // commonly done e.g. in C++ iterators and range-based for loops.
-    if (AllowOnePastEnd ? index.ule(size) : index.ult(size))
-      return;
+      if (BaseType != EffectiveType) {
+        // Make sure we're comparing apples to apples when comparing index to size
+        uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
+        uint64_t array_typesize = Context.getTypeSize(BaseType);
+        // Handle ptrarith_typesize being zero, such as when casting to void*
+        if (!ptrarith_typesize) ptrarith_typesize = 1;
+        if (ptrarith_typesize != array_typesize) {
+          // There's a cast to a different size type involved
+          uint64_t ratio = array_typesize / ptrarith_typesize;
+          // TODO: Be smarter about handling cases where array_typesize is not a
+          // multiple of ptrarith_typesize
+          if (ptrarith_typesize * ratio == array_typesize)
+            size *= llvm::APInt(size.getBitWidth(), ratio);
+        }
+      }
 
-    // Also don't warn for arrays of size 1 which are members of some
-    // structure. These are often used to approximate flexible arrays in C89
-    // code.
-    if (IsTailPaddedMemberArray(*this, size, ND))
-      return;
+      if (size.getBitWidth() > index.getBitWidth())
+        index = index.zext(size.getBitWidth());
+      else if (size.getBitWidth() < index.getBitWidth())
+        size = size.zext(index.getBitWidth());
 
-    // Suppress the warning if the subscript expression (as identified by the
-    // ']' location) and the index expression are both from macro expansions
-    // within a system header.
-    if (ASE) {
-      SourceLocation RBracketLoc = SourceMgr.getSpellingLoc(
-          ASE->getRBracketLoc());
-      if (SourceMgr.isInSystemHeader(RBracketLoc)) {
-        SourceLocation IndexLoc =
-            SourceMgr.getSpellingLoc(IndexExpr->getBeginLoc());
-        if (SourceMgr.isWrittenInSameFile(RBracketLoc, IndexLoc))
-          return;
+      // For array subscripting the index must be less than size, but for pointer
+      // arithmetic also allow the index (offset) to be equal to size since
+      // computing the next address after the end of the array is legal and
+      // commonly done e.g. in C++ iterators and range-based for loops.
+      if (AllowOnePastEnd ? index.ule(size) : index.ult(size))
+        return;
+
+      // Also don't warn for arrays of size 1 which are members of some
+      // structure. These are often used to approximate flexible arrays in C89
+      // code.
+      if (IsTailPaddedMemberArray(*this, size, ND))
+        return;
+
+      // Suppress the warning if the subscript expression (as identified by the
+      // ']' location) and the index expression are both from macro expansions
+      // within a system header.
+      if (ASE) {
+        SourceLocation RBracketLoc = SourceMgr.getSpellingLoc(
+            ASE->getRBracketLoc());
+        if (SourceMgr.isInSystemHeader(RBracketLoc)) {
+          SourceLocation IndexLoc =
+              SourceMgr.getSpellingLoc(IndexExpr->getBeginLoc());
+          if (SourceMgr.isWrittenInSameFile(RBracketLoc, IndexLoc))
+            return;
+        }
       }
-    }
 
-    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
-    if (ASE)
-      DiagID = diag::warn_array_index_exceeds_bounds;
+      unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
+      if (ASE)
+        DiagID = diag::warn_array_index_exceeds_bounds;
 
-    DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
-                        PDiag(DiagID) << index.toString(10, true)
-                                      << size.toString(10, true)
-                                      << (unsigned)size.getLimitedValue(~0U)
-                                      << IndexExpr->getSourceRange());
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+                          PDiag(DiagID) << index.toString(10, true)
+                                        << size.toString(10, true)
+                                        << (unsigned)size.getLimitedValue(~0U)
+                                        << IndexExpr->getSourceRange());
+    }
   } else {
+    if (isUnboundedArray)
+      return;
+
     unsigned DiagID = diag::warn_array_index_precedes_bounds;
     if (!ASE) {
       DiagID = diag::warn_ptr_arith_precedes_bounds;
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8831,6 +8831,14 @@
 def warn_array_index_exceeds_bounds : Warning<
   "array index %0 is past the end of the array (which contains %1 "
   "element%s2)">, InGroup<ArrayBounds>;
+def warn_ptr_arith_exceeds_max_addressable_bounds : Warning<
+  "the pointer incremented by %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
+def warn_array_index_exceeds_max_addressable_bounds : Warning<
+  "array index %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
 def note_array_declared_here : Note<
   "array %0 declared here">;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to