[PATCH] D14274: Add alloc_size attribute to clang

George Burgess IV via cfe-commits Mon, 02 Nov 2015 18:25:57 -0800

george.burgess.iv created this revision.
george.burgess.iv added a reviewer: rsmith.
george.burgess.iv added a subscriber: cfe-commits.


This is the first in a two patch series aimed at adding the alloc_size 
attribute to clang. This patch adds the alloc_size attribute, docs, and all of 
the logic needed to handle it in ExprConstant. The only thing that's _really_ 
missing is our ability to handle non-const locals, because that's proving to be 
a moderately interesting challenge. (Spoiler alert: that's basically the only 
thing in patch #2)

There's also a restructuring of TryEvaluateBuiltinObjectSize thrown in, because 
I felt like it was getting overly subtle. Hopefully the new version is 
considerably easier to reason about. :)

— Implementation notes —
- With the restructuring of TryEvaluateBuiltinObjectSize, I was able to make 
EM_DesignatorFold (now EM_OffsetFold) the only EvalMode we use in evaluating 
__builtin_object_size.

- The InvalidBase functionality of LValues was further (ab)used to get 
information to TryEvaluateBuiltinObjectSize. We know an LValue has been 
initialized by an alloc_size function if the base is invalid and the base 
itself is a CallExpr. This won’t conflict with current behavior, because (prior 
to this patch) all invalid bases were MemberExprs.

- As said in the ExprConstant code, I’d love a less hand-wavy approach to 
writing tryEvaluateLValueAsAllocSize.

http://reviews.llvm.org/D14274

Files:
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/DiagnosticSemaKinds.td
  lib/AST/ExprConstant.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGen/alloc-size.c
  test/Sema/alloc-size.c

Index: test/Sema/alloc-size.c
===================================================================
--- /dev/null
+++ test/Sema/alloc-size.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -verify
+
+void *fail1(int a) __attribute__((alloc_size)); //expected-error{{'alloc_size' attribute takes at least 1 argument}}
+void *fail2(int a) __attribute__((alloc_size())); //expected-error{{'alloc_size' attribute takes at least 1 argument}}
+
+void *fail3(int a) __attribute__((alloc_size(0))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds: can only be 1, since there is one parameter}}
+void *fail4(int a) __attribute__((alloc_size(2))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds: can only be 1, since there is one parameter}}
+
+void *fail5(int a, int b) __attribute__((alloc_size(0, 1))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds: must be between 1 and 2}}
+void *fail6(int a, int b) __attribute__((alloc_size(3, 1))); //expected-error{{'alloc_size' attribute parameter 1 is out of bounds: must be between 1 and 2}}
+
+void *fail7(int a, int b) __attribute__((alloc_size(1, 0))); //expected-error{{'alloc_size' attribute parameter 2 is out of bounds: must be between 1 and 2}}
+void *fail8(int a, int b) __attribute__((alloc_size(1, 3))); //expected-error{{'alloc_size' attribute parameter 2 is out of bounds: must be between 1 and 2}}
+
+int fail9(int a) __attribute__((alloc_size(1))); //expected-warning{{'alloc_size' attribute only applies to return values that are pointers}}
+
+int fail10 __attribute__((alloc_size(1))); //expected-warning{{'alloc_size' attribute only applies to functions}}
+
+void *fail11(void *a) __attribute__((alloc_size(1))); //expected-error{{'alloc_size' attribute only applies to integer arguments}}
+
+void *fail12(int a) __attribute__((alloc_size("abc"))); //expected-error{{'alloc_size' attribute requires parameter 1 to be an integer constant}}
+void *fail12(int a) __attribute__((alloc_size(1, "abc"))); //expected-error{{'alloc_size' attribute requires parameter 2 to be an integer constant}}
+void *fail13(int a) __attribute__((alloc_size(1U<<31))); //expected-error{{integer constant expression evaluates to value 2147483648 that cannot be represented in a 32-bit signed integer type}}
Index: test/CodeGen/alloc-size.c
===================================================================
--- /dev/null
+++ test/CodeGen/alloc-size.c
@@ -0,0 +1,263 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - 2>&1 | FileCheck %s
+
+#define NULL ((void *)0)
+
+int gi;
+
+void *my_malloc(int size) __attribute__((alloc_size(1)));
+void *my_calloc(int elem_size, int num_elems) __attribute__((alloc_size(1, 2)));
+
+// CHECK-LABEL: @test1
+void test1() {
+  void *const vp = my_malloc(100);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(vp, 0);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(vp, 1);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(vp, 2);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(vp, 3);
+
+  void *const arr = my_calloc(100, 5);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(arr, 0);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(arr, 1);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(arr, 2);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(arr, 3);
+
+  // CHECK: store i32 100
+  gi = __builtin_object_size(my_malloc(100), 0);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(my_malloc(100), 1);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(my_malloc(100), 2);
+  // CHECK: store i32 100
+  gi = __builtin_object_size(my_malloc(100), 3);
+
+  // CHECK: store i32 500
+  gi = __builtin_object_size(my_calloc(100, 5), 0);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(my_calloc(100, 5), 1);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(my_calloc(100, 5), 2);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(my_calloc(100, 5), 3);
+}
+
+// CHECK-LABEL: @test2
+void test2() {
+  void *const vp = my_malloc(gi);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(vp, 0);
+
+  void *const arr1 = my_calloc(gi, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(arr1, 0);
+
+  void *const arr2 = my_calloc(1, gi);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(arr2, 0);
+}
+
+// CHECK-LABEL: @test3
+void test3() {
+  char *const buf = (char *)my_calloc(100, 5);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(buf, 0);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(buf, 1);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(buf, 2);
+  // CHECK: store i32 500
+  gi = __builtin_object_size(buf, 3);
+}
+
+struct Data {
+  int a;
+  int t[10];
+  char pad[3];
+  char end[1];
+};
+
+// CHECK-LABEL: @test5
+void test5() {
+  struct Data *const data = my_malloc(sizeof(*data));
+  // CHECK: store i32 48
+  gi = __builtin_object_size(data, 0);
+  // CHECK: store i32 48
+  gi = __builtin_object_size(data, 1);
+  // CHECK: store i32 48
+  gi = __builtin_object_size(data, 2);
+  // CHECK: store i32 48
+  gi = __builtin_object_size(data, 3);
+
+  // CHECK: store i32 40
+  gi = __builtin_object_size(&data->t[1], 0);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&data->t[1], 1);
+  // CHECK: store i32 40
+  gi = __builtin_object_size(&data->t[1], 2);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&data->t[1], 3);
+
+  struct Data *const arr = my_calloc(sizeof(*data), 2);
+  // CHECK: store i32 96
+  gi = __builtin_object_size(arr, 0);
+  // CHECK: store i32 96
+  gi = __builtin_object_size(arr, 1);
+  // CHECK: store i32 96
+  gi = __builtin_object_size(arr, 2);
+  // CHECK: store i32 96
+  gi = __builtin_object_size(arr, 3);
+
+  // CHECK: store i32 88
+  gi = __builtin_object_size(&arr->t[1], 0);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&arr->t[1], 1);
+  // CHECK: store i32 88
+  gi = __builtin_object_size(&arr->t[1], 2);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&arr->t[1], 3);
+}
+
+// CHECK-LABEL: @test6
+void test6() {
+  // Things that would normally trigger conservative estimates don't need to do
+  // so when we know the source of the allocation.
+  struct Data *const data = my_malloc(sizeof(*data) + 10);
+  // CHECK: store i32 11
+  gi = __builtin_object_size(data->end, 0);
+  // CHECK: store i32 11
+  gi = __builtin_object_size(data->end, 1);
+  // CHECK: store i32 11
+  gi = __builtin_object_size(data->end, 2);
+  // CHECK: store i32 11
+  gi = __builtin_object_size(data->end, 3);
+
+  struct Data *const arr = my_calloc(sizeof(*arr) + 5, 3);
+  // It's unclear what the best path is here.
+  //
+  // First, if we're writing off the end and we're in an array, then the user's
+  // intent gets a lot less clear. Consider:
+  //   struct Data { char end[1]; };
+  //   struct Data *D = calloc(sizeof(*D) + 10, 2);
+  //   __builtin_object_size(D[0]->end, 1);
+  // We can't reasonably determine whether the user will consider the 0th
+  // element of D to be at ((char*)D + sizeof(*D)), or ((char*)D + sizeof(*D)
+  // + 10).
+  //
+  // Next, many C users see this:
+  //   struct Foo { /* [snip] */ };
+  //   void *p = malloc(sizeof(struct Foo) * 100);
+  //   memset(p, '\0', sizeof(struct Foo) * 100);
+  //
+  // ...As identical to this:
+  //   struct Foo { /* [snip] */ };
+  //   void *p = calloc(sizeof(struct Foo), 100);
+  //
+  // ...So acting differently in these cases may be confusing.
+  //
+  // Therefore, I think the best path is to treat them as identical, since we're
+  // guaranteed that the bytes, at the very least, exist, and there's no clear
+  // object boundaries to speak of. Note that all of this only applies if we've
+  // hit the special "we're trying to write off the end of an object" logic.
+
+  // CHECK: store i32 112
+  gi = __builtin_object_size(arr[0].end, 0);
+  // CHECK: store i32 112
+  gi = __builtin_object_size(arr[0].end, 1);
+  // CHECK: store i32 112
+  gi = __builtin_object_size(arr[0].end, 2);
+  // CHECK: store i32 112
+  gi = __builtin_object_size(arr[0].end, 3);
+
+  // CHECK: store i32 64
+  gi = __builtin_object_size(arr[1].end, 0);
+  // CHECK: store i32 64
+  gi = __builtin_object_size(arr[1].end, 1);
+  // CHECK: store i32 64
+  gi = __builtin_object_size(arr[1].end, 2);
+  // CHECK: store i32 64
+  gi = __builtin_object_size(arr[1].end, 3);
+
+  // CHECK: store i32 16
+  gi = __builtin_object_size(arr[2].end, 0);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(arr[2].end, 1);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(arr[2].end, 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(arr[2].end, 3);
+}
+
+// CHECK-LABEL: @test7
+void test7() {
+  // Note: sizeof(struct Data) - 2*sizeof(int) == 40
+
+  // Chop the char arrays + final int of t[10] off the end.
+  struct Data *const data = my_malloc(sizeof(*data)-2*sizeof(int));
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&data->t[1], 0);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&data->t[1], 1);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&data->t[1], 2);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&data->t[1], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(data->pad, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(data->pad, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(data->pad, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(data->pad, 3);
+
+  struct Data *const arr = my_calloc(sizeof(*data)-sizeof(int), 2);
+  // CHECK: store i32 80
+  gi = __builtin_object_size(&arr[0].t[1], 0);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&arr[0].t[1], 1);
+  // CHECK: store i32 80
+  gi = __builtin_object_size(&arr[0].t[1], 2);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(&arr[0].t[1], 3);
+
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&arr[1].t[1], 0);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&arr[1].t[1], 1);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&arr[1].t[1], 2);
+  // CHECK: store i32 32
+  gi = __builtin_object_size(&arr[1].t[1], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(arr[1].pad, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(arr[1].pad, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(arr[1].pad, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(arr[1].pad, 3);
+}
+
+// CHECK-LABEL: @test8
+void test8() {
+  // Non-const pointers aren't currently supported. They (hopefully) will be in
+  // the near future.
+  void *buf = my_calloc(100, 5);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(buf, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(buf, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(buf, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(buf, 3);
+}
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -705,6 +705,98 @@
                                      Attr.getAttributeSpellingListIndex()));
 }
 
+/// Checks to be sure that the given parameter number is inbounds, and is an
+/// some integral type. Will emit appropriate diagnostics if this returns false.
+///
+/// FuncParamNo is expected to be from the user, so is base-1. AttrArgNo is used
+/// to actually retrieve the argument, so it's base-0.
+static bool checkParamIsIntegerType(Sema &S, const FunctionProtoType *FPT,
+                                    const AttributeList &Attr,
+                                    unsigned FuncParamNo, unsigned AttrArgNo) {
+  assert(Attr.getArg(AttrArgNo).is<Expr *>());
+  // FuncParamNo is base-1
+  if (FuncParamNo < 1 || FuncParamNo > FPT->getNumParams()) {
+    auto SrcLoc = Attr.getArgAsExpr(AttrArgNo)->getLocStart();
+    auto UserArgNo = AttrArgNo + 1;
+    S.Diag(SrcLoc, diag::err_attribute_argument_out_of_range)
+        << Attr.getName() << UserArgNo << FPT->getNumParams();
+    return false;
+  }
+
+  if (!FPT->getParamType(FuncParamNo - 1)->isIntegerType()) {
+    auto SrcLoc = Attr.getArgAsExpr(AttrArgNo)->getLocStart();
+    S.Diag(SrcLoc, diag::err_attribute_integers_only) << Attr.getName();
+    return false;
+  }
+  return true;
+}
+
+/// \brief Wrapper around checkUInt32Argument, with an extra check to be sure
+/// that the result will fit into a regular (signed) int. All args have the same
+/// purpose as they do in checkUInt32Argument.
+static bool checkPositiveIntArgument(Sema &S, const AttributeList &Attr,
+                                     const Expr *Expr, int &Val,
+                                     unsigned Idx = UINT_MAX) {
+  uint32_t UVal;
+  if (!checkUInt32Argument(S, Attr, Expr, UVal, Idx))
+    return false;
+
+  if (UVal > std::numeric_limits<int>::max()) {
+    llvm::APSInt I(32); // for toString
+    I = UVal;
+    S.Diag(Expr->getExprLoc(), diag::err_ice_too_large)
+        << I.toString(10, false) << 32 << /* Unsigned */ 0;
+    return false;
+  }
+
+  Val = (int)UVal;
+  return true;
+}
+
+static void handleAllocSizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
+      !checkAttributeAtMostNumArgs(S, Attr, 2))
+    return;
+
+  auto *FD = dyn_cast<FunctionDecl>(D);
+  if (FD == nullptr || !FD->hasPrototype()) {
+    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
+        << Attr.getName() << 4;
+    return;
+  }
+
+  auto *FPT = FD->getType()->castAs<FunctionProtoType>();
+  if (!FPT->getReturnType()->isPointerType()) {
+    S.Diag(Attr.getLoc(), diag::warn_attribute_return_pointers_only)
+        << Attr.getName();
+    return;
+  }
+
+  auto *SizeExpr = Attr.getArgAsExpr(0);
+  int SizeArgNo;
+  // Paramater indices are 1-based, hence Index=1
+  if (!checkPositiveIntArgument(S, Attr, SizeExpr, SizeArgNo, /*Index=*/1))
+    return;
+
+  if (!checkParamIsIntegerType(S, FPT, Attr, SizeArgNo, /*AttrArgNo=*/0))
+    return;
+
+  // Args are 1-based, so 0 implies that the arg was not present
+  int NumberArgNo = 0;
+  if (Attr.getNumArgs() == 2) {
+    auto *NumberExpr = Attr.getArgAsExpr(1);
+    // Paramater indices are 1-based, hence Index=2
+    if (!checkPositiveIntArgument(S, Attr, NumberExpr, NumberArgNo,
+                                  /*Index=*/2))
+      return;
+    if (!checkParamIsIntegerType(S, FPT, Attr, NumberArgNo, /*AttrArgNo=*/1))
+      return;
+  }
+
+  D->addAttr(::new (S.Context) AllocSizeAttr(
+      Attr.getRange(), S.Context, SizeArgNo, NumberArgNo,
+      Attr.getAttributeSpellingListIndex()));
+}
 
 static bool checkTryLockFunAttrCommon(Sema &S, Decl *D,
                                       const AttributeList &Attr,
@@ -4753,6 +4845,9 @@
   case AttributeList::AT_AlignValue:
     handleAlignValueAttr(S, D, Attr);
     break;
+  case AttributeList::AT_AllocSize:
+    handleAllocSizeAttr(S, D, Attr);
+    break;
   case AttributeList::AT_AlwaysInline:
     handleAlwaysInlineAttr(S, D, Attr);
     break;
Index: lib/AST/ExprConstant.cpp
===================================================================
--- lib/AST/ExprConstant.cpp
+++ lib/AST/ExprConstant.cpp
@@ -513,9 +513,14 @@
       /// gets a chance to look at it.
       EM_PotentialConstantExpressionUnevaluated,
 
-      /// Evaluate as a constant expression. Continue evaluating if we find a
-      /// MemberExpr with a base that can't be evaluated.
-      EM_DesignatorFold,
+      /// Evaluate as a constant expression. Continue evaluating if either:
+      /// - We find a MemberExpr with a base that can't be evaluated.
+      /// - We find a variable initialized with a call to a function that has
+      ///   the alloc_size attribute on it.
+      /// In either case, the LValue returned shall have an invalid base; in the
+      /// former, the base will be the invalid MemberExpr, in the latter, the
+      /// base will be the CallExpr.
+      EM_OffsetFold,
     } EvalMode;
 
     /// Are we checking whether the expression is a potential constant
@@ -618,7 +623,7 @@
           case EM_PotentialConstantExpression:
           case EM_ConstantExpressionUnevaluated:
           case EM_PotentialConstantExpressionUnevaluated:
-          case EM_DesignatorFold:
+          case EM_OffsetFold:
             HasActiveDiagnostic = false;
             return OptionalDiagnostic();
           }
@@ -698,7 +703,7 @@
       case EM_ConstantExpression:
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
-      case EM_DesignatorFold:
+      case EM_OffsetFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
@@ -727,14 +732,14 @@
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
       case EM_IgnoreSideEffects:
-      case EM_DesignatorFold:
+      case EM_OffsetFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
     }
 
     bool allowInvalidBaseExpr() const {
-      return EvalMode == EM_DesignatorFold;
+      return EvalMode == EM_OffsetFold;
     }
   };
 
@@ -771,11 +776,10 @@
   struct FoldOffsetRAII {
     EvalInfo &Info;
     EvalInfo::EvaluationMode OldMode;
-    explicit FoldOffsetRAII(EvalInfo &Info, bool Subobject)
+    explicit FoldOffsetRAII(EvalInfo &Info)
         : Info(Info), OldMode(Info.EvalMode) {
       if (!Info.checkingPotentialConstantExpression())
-        Info.EvalMode = Subobject ? EvalInfo::EM_DesignatorFold
-                                  : EvalInfo::EM_ConstantFold;
+        Info.EvalMode = EvalInfo::EM_OffsetFold;
     }
 
     ~FoldOffsetRAII() { Info.EvalMode = OldMode; }
@@ -4824,6 +4828,129 @@
 // Pointer Evaluation
 //===----------------------------------------------------------------------===//
 
+static bool isCallToAllocSizeFunction(const CallExpr *E) {
+  auto *Callee = E->getDirectCallee();
+  return Callee != nullptr && Callee->hasAttr<AllocSizeAttr>();
+}
+
+static bool isBaseAnAllocSizeCall(const LValue &LVal) {
+  auto *Base = LVal.getLValueBase().dyn_cast<const Expr *>();
+  auto *CE = dyn_cast_or_null<CallExpr>(Base);
+  return CE != nullptr && isCallToAllocSizeFunction(CE);
+}
+
+static bool getNumBytesReturnedByAllocSizeFunction(const ASTContext &Ctx,
+                                                   const LValue &LVal,
+                                                   CharUnits &Result) {
+  assert(isBaseAnAllocSizeCall(LVal));
+  auto *Call = cast<CallExpr>(LVal.getLValueBase().get<const Expr *>());
+  auto *AllocSize = Call->getCalleeDecl()->getAttr<AllocSizeAttr>();
+
+  // alloc_size args are base-1, 0 means not present.
+  assert(AllocSize->getElemSizeParam() != 0);
+  unsigned SizeArgNo = AllocSize->getElemSizeParam() - 1;
+  unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType());
+  if (Call->getNumArgs() <= SizeArgNo)
+    return false;
+
+  APSInt SizeOfElem(/*BitWidth=*/BitsInSizeT, /*IsUnsigned=*/true);
+  if (!Call->getArg(SizeArgNo)->EvaluateAsInt(
+          SizeOfElem, Ctx, /*AllowSideEffects=*/Expr::SE_AllowSideEffects))
+    return false;
+
+  bool WasNumArgSpecified = AllocSize->getNumElemsParam() != 0;
+  if (!WasNumArgSpecified) {
+    Result = CharUnits::fromQuantity(SizeOfElem.getZExtValue());
+    return true;
+  }
+
+  APSInt NumberOfElems(/*BitWidth=*/BitsInSizeT, /*IsUnsigned=*/true);
+  // Argument numbers start at 1
+  unsigned NumArgNo = AllocSize->getNumElemsParam() - 1;
+  if (!Call->getArg(NumArgNo)->EvaluateAsInt(
+          NumberOfElems, Ctx, /*AllowSideEffects=*/Expr::SE_AllowSideEffects))
+    return false;
+
+  bool Overflow = false;
+  auto BytesAvailable = SizeOfElem.umul_ov(NumberOfElems, Overflow);
+  if (Overflow)
+    return false;
+
+  Result = CharUnits::fromQuantity(BytesAvailable.getZExtValue());
+  return true;
+}
+
+// Attempts to evaluate the given LValue as the result of a call to a function
+// with the alloc_size attribute on it. This is an extremely fun case, as it is
+// very different from many things we try to handle in ExprConstant (e.g. "we
+// know that this is a block of memory, but can make no guarantees about its
+// value").  Also, it requires a bit of whole-function static analysis, because
+// code like the following needs to be correctly handled:
+//
+// void foo() {
+//   void *ptr = malloc(sizeof(int));
+//   for (int i = 0; i < 2; i++) {
+//     printf("%ld\n", __builtin_object_size(ptr, 0));
+//     free(ptr);
+//     ptr = malloc(sizeof(int) * 2);
+//   }
+// }
+//
+// Where "correct", in our current implementation, means "fails".
+static bool tryEvaluateLValueAsAllocSize(const Expr *E, LValue &Result,
+                                         EvalInfo &Info) {
+  auto ExtractCallExpr = [&Info](const Expr *E) -> const CallExpr *{
+    // A *very* common idiom with e.g. malloc is:
+    // struct Foo *f = (struct Foo *)malloc(sizeof(*f));
+    // We want to succeed on the common case, so we ignore parens/implicit
+    // casts, and, if the function returns void*, a single cast of any kind.
+    auto *MaybeCast = E->IgnoreParenImpCasts();
+    // Ignore one explicit cast
+    auto *Explicit = dyn_cast<CastExpr>(MaybeCast);
+    if (Explicit == nullptr)
+      return dyn_cast<CallExpr>(MaybeCast);
+
+    const Expr *MaybeCall = Explicit->getSubExpr()->IgnoreParenImpCasts();
+    if (auto *Call = dyn_cast<CallExpr>(MaybeCall))
+      if (Call->getCallReturnType(Info.Ctx)->isVoidPointerType())
+        return Call;
+    return nullptr;
+  };
+
+  assert(E->isGLValue());
+  if (!Info.allowInvalidBaseExpr())
+    return false;
+
+  // We're specifically looking for a varaible initialized by a function with
+  // the alloc_size attribute. This requires a lot of guesswork and hand-waving.
+  // FIXME: Code-review note: I would *love* suggestions for a better way to do
+  // this.
+  auto *DRE = dyn_cast<DeclRefExpr>(E);
+  if (DRE == nullptr)
+    return false;
+
+  auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
+  // Don't support parameters (they can be assigned without us knowing) or
+  // static variables (use-before-assign isn't UB)
+  if (VD == nullptr || !VD->isLocalVarDecl())
+    return false;
+
+  // We currently don't support non-constant variables.
+  if (!VD->getType().isConstQualified())
+    return false;
+
+  const Expr *Init = VD->getAnyInitializer();
+  if (Init == nullptr)
+    return false;
+
+  const CallExpr *Call = ExtractCallExpr(Init);
+  if (Call == nullptr || !isCallToAllocSizeFunction(Call))
+    return false;
+
+  Result.setInvalid(Call);
+  return true;
+}
+
 namespace {
 class PointerExprEvaluator
   : public ExprEvaluatorBase<PointerExprEvaluator> {
@@ -5009,6 +5136,19 @@
 
   case CK_FunctionToPointerDecay:
     return EvaluateLValue(SubExpr, Result, Info);
+
+  case CK_LValueToRValue: {
+    LValue LVal;
+    if (!EvaluateLValue(E->getSubExpr(), LVal, Info))
+      return tryEvaluateLValueAsAllocSize(E->getSubExpr(), Result, Info);
+
+    APValue RVal;
+    // Note, we use the subexpression's type in order to retain cv-qualifiers.
+    if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(),
+                                        LVal, RVal))
+      return tryEvaluateLValueAsAllocSize(E->getSubExpr(), Result, Info);
+    return Success(RVal, E);
+  }
   }
 
   return ExprEvaluatorBaseTy::VisitCastExpr(E);
@@ -5119,7 +5259,13 @@
     return true;
   }
   default:
-    return ExprEvaluatorBaseTy::VisitCallExpr(E);
+    if (ExprEvaluatorBaseTy::VisitCallExpr(E))
+      return true;
+    if (isCallToAllocSizeFunction(E)) {
+      Result.setInvalid(E);
+      return true;
+    }
+    return false;
   }
 }
 
@@ -6372,9 +6518,119 @@
     return true;
 
   auto *E = LVal.Base.dyn_cast<const Expr *>();
-  (void)E;
-  assert(E != nullptr && isa<MemberExpr>(E));
-  return false;
+  return E == nullptr || !isa<MemberExpr>(E);
+}
+
+// Helper for TryEvaluateBuiltinObjectSize -- Given an LValue, this will
+// determine how many bytes exist from the beginning of the object to either the
+// end of the current subobject, or the end of the object itself, depending on
+// what the LValue looks like + the value of Type.
+static bool determineEndOffset(EvalInfo &Info, const CallExpr *E, unsigned Type,
+                               const LValue &Base, CharUnits &Result) {
+  // In the case where we're not dealing with a subobject in the expression, we
+  // ignore the subobject bit.
+  bool SubobjectOnly = (Type & 1) != 0 && !refersToCompleteObject(Base);
+
+  CharUnits AllocSizeBytes;
+  bool IsAllocSizeBase = isBaseAnAllocSizeCall(Base);
+  if (IsAllocSizeBase)
+    if (!getNumBytesReturnedByAllocSizeFunction(Info.Ctx, Base, AllocSizeBytes))
+      return false;
+
+  // We want to evaluate the size of the entire object
+  if (!SubobjectOnly || Base.Designator.Invalid) {
+    if (IsAllocSizeBase) {
+      Result = AllocSizeBytes;
+      return true;
+    }
+
+    // If we can't find the designator and we need to give a lower-bound, then
+    // we can't do anything. If we need to give an upper-bound (i.e. Type==1),
+    // we can fall back to evaluating the full object later.
+    if (Base.Designator.Invalid && Type == 3) {
+      assert(SubobjectOnly);
+      return false;
+    }
+
+    if (Base.InvalidBase)
+      return false;
+
+    QualType BaseTy = getObjectType(Base.getLValueBase());
+    if (BaseTy.isNull())
+      return false;
+
+    return HandleSizeof(Info, E->getExprLoc(), BaseTy, Result);
+  }
+
+  // We want to evaluate the size of a subobject.
+  const auto &Designator = Base.Designator;
+
+  // The following is a moderately common idiom in C:
+  //
+  // struct Foo { int a; char c[1]; };
+  // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar));
+  // strcpy(&F->c[0], Bar);
+  //
+  // So, if we see that we're examining a 1-length (or 0-length) array at the
+  // end of a struct with an unknown base, we'll either pretend we're
+  // evaluating a full object (this is fine because the designator is at the
+  // end), or we'll give up. If we can't determine how many bytes are available,
+  // then we need to give up if Type==1, because we can't correctly provide an
+  // upper bound. We can, however always, provide a lower bound of 1 for
+  // Type==3.
+  //
+  // Note that this may make our behavior a bit wonky when interacting with
+  // calloc and type=3. Consider:
+  //   struct Data { int i; char chars[1]; };
+  //   struct Data *D = calloc(sizeof(*D), 2);
+  //   __builtin_object_size(D.chars, 3) == 9;
+  // There's a full argument for why this is the least-bad of all alternatives
+  // in test/CodeGen/alloc-size.c (test #6). I just wanted to note this locally.
+  if (Base.InvalidBase &&
+      Designator.Entries.size() == Designator.MostDerivedPathLength &&
+      Designator.MostDerivedIsArrayElement &&
+      Designator.MostDerivedArraySize < 2 &&
+      isDesignatorAtObjectEnd(Info.Ctx, Base)) {
+    if (IsAllocSizeBase) {
+      Result = AllocSizeBytes;
+      return true;
+    }
+    if (Type == 1)
+      return false;
+    // Type==3 uses the standard behavior if we don't have alloc_size.
+  }
+
+  CharUnits BytesPerElem;
+  if (!HandleSizeof(Info, E->getExprLoc(), Designator.MostDerivedType,
+                    BytesPerElem))
+    return false;
+
+  // According to the GCC documentation, we want the size of the subobject
+  // denoted by the pointer. But that's not quite right -- what we actually
+  // want is the size of the immediately-enclosing array, if there is one.
+  int64_t ElemsRemaining = 1;
+  if (Designator.MostDerivedIsArrayElement &&
+      Designator.Entries.size() == Designator.MostDerivedPathLength)
+    ElemsRemaining =
+        Designator.MostDerivedArraySize - Designator.Entries.back().ArrayIndex;
+  else if (Designator.isOnePastTheEnd())
+    ElemsRemaining = 0;
+
+  if (ElemsRemaining <= 0) {
+    // We've gone passed the end -- pretend there were zero bytes.
+    Result = CharUnits::Zero();
+    return true;
+  }
+
+  CharUnits BytesRemainingInArray = BytesPerElem * ElemsRemaining;
+  CharUnits SubobjectBytes = Base.Offset + BytesRemainingInArray;
+  // We need to be correct even if the user allocates less bytes than the type
+  // requires.
+  if (IsAllocSizeBase)
+    Result = std::min(SubobjectBytes, AllocSizeBytes);
+  else
+    Result = SubobjectBytes;
+  return true;
 }
 
 bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E,
@@ -6386,7 +6642,7 @@
     // If there are any, but we can determine the pointed-to object anyway, then
     // ignore the side-effects.
     SpeculativeEvaluationRAII SpeculativeEval(Info);
-    FoldOffsetRAII Fold(Info, Type & 1);
+    FoldOffsetRAII Fold(Info);
     const Expr *Ptr = ignorePointerCastsAndParens(E->getArg(0));
     if (!EvaluatePointer(Ptr, Base, Info))
       return false;
@@ -6398,87 +6654,8 @@
   if (BaseOffset.isNegative())
     return Success(0, E);
 
-  // In the case where we're not dealing with a subobject, we discard the
-  // subobject bit.
-  bool SubobjectOnly = (Type & 1) != 0 && !refersToCompleteObject(Base);
-
-  // If Type & 1 is 0, we need to be able to statically guarantee that the bytes
-  // exist. If we can't verify the base, then we can't do that.
-  //
-  // As a special case, we produce a valid object size for an unknown object
-  // with a known designator if Type & 1 is 1. For instance:
-  //
-  //   extern struct X { char buff[32]; int a, b, c; } *p;
-  //   int a = __builtin_object_size(p->buff + 4, 3); // returns 28
-  //   int b = __builtin_object_size(p->buff + 4, 2); // returns 0, not 40
-  //
-  // This matches GCC's behavior.
-  if (Base.InvalidBase && !SubobjectOnly)
-    return Error(E);
-
-  // If we're not examining only the subobject, then we reset to a complete
-  // object designator
-  //
-  // If Type is 1 and we've lost track of the subobject, just find the complete
-  // object instead. (If Type is 3, that's not correct behavior and we should
-  // return 0 instead.)
-  LValue End = Base;
-  if (!SubobjectOnly || (End.Designator.Invalid && Type == 1)) {
-    QualType T = getObjectType(End.getLValueBase());
-    if (T.isNull())
-      End.Designator.setInvalid();
-    else {
-      End.Designator = SubobjectDesignator(T);
-      End.Offset = CharUnits::Zero();
-    }
-  }
-
-  // If it is not possible to determine which objects ptr points to at compile
-  // time, __builtin_object_size should return (size_t) -1 for type 0 or 1
-  // and (size_t) 0 for type 2 or 3.
-  if (End.Designator.Invalid)
-    return false;
-
-  // According to the GCC documentation, we want the size of the subobject
-  // denoted by the pointer. But that's not quite right -- what we actually
-  // want is the size of the immediately-enclosing array, if there is one.
-  int64_t AmountToAdd = 1;
-  if (End.Designator.MostDerivedIsArrayElement &&
-      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength) {
-    // We got a pointer to an array. Step to its end.
-    AmountToAdd = End.Designator.MostDerivedArraySize -
-      End.Designator.Entries.back().ArrayIndex;
-  } else if (End.Designator.isOnePastTheEnd()) {
-    // We're already pointing at the end of the object.
-    AmountToAdd = 0;
-  }
-
-  QualType PointeeType = End.Designator.MostDerivedType;
-  assert(!PointeeType.isNull());
-  if (PointeeType->isIncompleteType() || PointeeType->isFunctionType())
-    return Error(E);
-
-  if (!HandleLValueArrayAdjustment(Info, E, End, End.Designator.MostDerivedType,
-                                   AmountToAdd))
-    return false;
-
-  auto EndOffset = End.getLValueOffset();
-
-  // The following is a moderately common idiom in C:
-  //
-  // struct Foo { int a; char c[1]; };
-  // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar));
-  // strcpy(&F->c[0], Bar);
-  //
-  // So, if we see that we're examining a 1-length (or 0-length) array at the
-  // end of a struct with an unknown base, we give up instead of breaking code
-  // that behaves this way. Note that we only do this when Type=1, because
-  // Type=3 is a lower bound, so answering conservatively is fine.
-  if (End.InvalidBase && SubobjectOnly && Type == 1 &&
-      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength &&
-      End.Designator.MostDerivedIsArrayElement &&
-      End.Designator.MostDerivedArraySize < 2 &&
-      isDesignatorAtObjectEnd(Info.Ctx, End))
+  CharUnits EndOffset;
+  if (!determineEndOffset(Info, E, Type, Base, EndOffset))
     return false;
 
   if (BaseOffset > EndOffset)
@@ -6517,7 +6694,7 @@
     case EvalInfo::EM_ConstantFold:
     case EvalInfo::EM_EvaluateForOverflow:
     case EvalInfo::EM_IgnoreSideEffects:
-    case EvalInfo::EM_DesignatorFold:
+    case EvalInfo::EM_OffsetFold:
       // Leave it to IR generation.
       return Error(E);
     case EvalInfo::EM_ConstantExpressionUnevaluated:
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2060,6 +2060,10 @@
   "%0 attribute only applies to pointer arguments">,
   InGroup<IgnoredAttributes>;
 def err_attribute_pointers_only : Error<warn_attribute_pointers_only.Text>;
+def warn_attribute_integers_only : Warning<
+  "%0 attribute only applies to integer arguments">,
+  InGroup<IgnoredAttributes>;
+def err_attribute_integers_only : Error<warn_attribute_integers_only.Text>;
 def warn_attribute_return_pointers_only : Warning<
   "%0 attribute only applies to return values that are pointers">,
   InGroup<IgnoredAttributes>;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -178,6 +178,41 @@
   }];
 }
 
+def AllocSizeDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+.. Note:: This attribute is not yet fully functional. Specifically, it doesn't
+  yet work with non-const pointers. It is planned to support these in the near
+  future.
+
+The ``alloc_size`` attribute can be placed on functions that return pointers in
+order to hint to the compiler how many bytes of memory will be available at the
+the returned poiner. ``alloc_size`` takes one or two arguments.
+
+- ``alloc_size(N)`` implies that argument number N equals the number of
+  available bytes at the returned pointer.
+- ``alloc_size(N, M)`` implies that the product of argument number N and
+  argument number M equals the number of available bytes at the returned
+  pointer.
+
+Argument numbers are 1-based.
+
+An example of how to use ``alloc_size``
+
+.. code-block:: c
+
+  void *my_malloc(int a) __attribute__((alloc_size(1)));
+  void *my_calloc(int a, int b) __attribute__((alloc_size(1, 2)));
+
+  int main() {
+    void *const p = my_malloc(100);
+    assert(__builtin_object_size(p, 0) == 100);
+    void *const a = my_calloc(20, 5);
+    assert(__builtin_object_size(a, 0) == 100);
+  }
+  }];
+}
+
 def EnableIfDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -709,6 +709,14 @@
   let Documentation = [Undocumented];
 }
 
+def AllocSize : InheritableAttr {
+  let Spellings = [GCC<"alloc_size">];
+  let Subjects = SubjectList<[Function]>;
+  let Args = [IntArgument<"ElemSizeParam">, IntArgument<"NumElemsParam", 1>];
+  let TemplateDependent = 1;
+  let Documentation = [AllocSizeDocs];
+}
+
 def EnableIf : InheritableAttr {
   let Spellings = [GNU<"enable_if">];
   let Subjects = SubjectList<[Function]>;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D14274: Add alloc_size attribute to clang

Reply via email to