Author: Oskar Wirga
Date: 2023-09-29T15:34:31-07:00
New Revision: 832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12

URL: 
https://github.com/llvm/llvm-project/commit/832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12
DIFF: 
https://github.com/llvm/llvm-project/commit/832b3b2462c1bb8e2b41ef96fe0ffd3791df0e12.diff

LOG: Modify BoundsSan to improve debuggability (#65972)

Context
BoundsSanitizer is a mitigation that is part of UBSAN. It can be enabled
in "trap" mode to crash on OOB array accesses.

Problem
BoundsSan has zero false positives meaning every crash is a OOB array
access, unfortunately optimizations cause these crashes in production
builds to be a bit useless because we only know which function is
crashing but not which line of code.

Godbolt example of the optimization: https://godbolt.org/z/6qjax9z1b

This Diff
I wanted to provide a way to know exactly which LOC is responsible for
the crash. What we do here is use the size of the basic block as an
iterator to an immediate value for the ubsan trap.

Previous discussion: https://reviews.llvm.org/D148654

Added: 
    llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
    llvm/test/MC/AArch64/local-bounds-single-trap.ll

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/test/CodeGen/bounds-checking.c
    llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3123e278985f210..7b4389c874b3f90 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -51,6 +51,12 @@
 using namespace clang;
 using namespace CodeGen;
 
+// Experiment to make sanitizers easier to debug
+static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization(
+    "ubsan-unique-traps", llvm::cl::Optional,
+    llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"),
+    llvm::cl::init(false));
+
 //===--------------------------------------------------------------------===//
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
@@ -3553,17 +3559,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value 
*Checked,
   // check-type per function to save on code size.
   if (TrapBBs.size() <= CheckHandlerID)
     TrapBBs.resize(CheckHandlerID + 1);
+
   llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
 
-  if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
-      (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+  if (!ClSanitizeDebugDeoptimization &&
+      CGM.getCodeGenOpts().OptimizationLevel && TrapBB &&
+      (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+    auto Call = TrapBB->begin();
+    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
+
+    Call->applyMergedLocation(Call->getDebugLoc(),
+                              Builder.getCurrentDebugLocation());
+    Builder.CreateCondBr(Checked, Cont, TrapBB);
+  } else {
     TrapBB = createBasicBlock("trap");
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
 
-    llvm::CallInst *TrapCall =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
-                           llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
+    llvm::CallInst *TrapCall = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
+        llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization
+                                               ? TrapBB->getParent()->size()
+                                               : CheckHandlerID));
 
     if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
       auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
@@ -3573,13 +3590,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
-  } else {
-    auto Call = TrapBB->begin();
-    assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
-
-    Call->applyMergedLocation(Call->getDebugLoc(),
-                              Builder.getCurrentDebugLocation());
-    Builder.CreateCondBr(Checked, Cont, TrapBB);
   }
 
   EmitBlock(Cont);

diff  --git a/clang/test/CodeGen/bounds-checking.c 
b/clang/test/CodeGen/bounds-checking.c
index 1b9e28915e5d9af..636d4f289e24786 100644
--- a/clang/test/CodeGen/bounds-checking.c
+++ b/clang/test/CodeGen/bounds-checking.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsanitize=local-bounds -emit-llvm -triple 
x86_64-apple-darwin10 %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fsanitize=array-bounds -O -fsanitize-trap=array-bounds 
-emit-llvm -triple x86_64-apple-darwin10 -DNO_DYNAMIC %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=local-bounds -fsanitize-trap=local-bounds -O3 
-mllvm -bounds-checking-unique-traps -emit-llvm -triple x86_64-apple-darwin10 
%s -o - | FileCheck %s --check-prefixes=NOOPTLOCAL
+// RUN: %clang_cc1 -fsanitize=array-bounds -fsanitize-trap=array-bounds -O3 
-mllvm -ubsan-unique-traps -emit-llvm -triple x86_64-apple-darwin10 %s -o - | 
FileCheck %s --check-prefixes=NOOPTARRAY
 //
 // REQUIRES: x86-registered-target
 
@@ -66,3 +68,17 @@ int f7(union U *u, int i) {
   // CHECK-NOT: @llvm.ubsantrap
   return u->c[i];
 }
+
+
+char B[10];
+char B2[10];
+// CHECK-LABEL: @f8
+void f8(int i, int k) {
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 3)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 2)
+  B[i] = '\0';
+
+  // NOOPTLOCAL: call void @llvm.ubsantrap(i8 5)
+  // NOOPTARRAY: call void @llvm.ubsantrap(i8 4)
+  B2[k] = '\0';
+}

diff  --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp 
b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 709095184af5df5..ee5b819604170eb 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -37,6 +37,9 @@ using namespace llvm;
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
                                   cl::desc("Use one trap block per function"));
 
+static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
+                                 cl::desc("Always use one trap per check"));
+
 STATISTIC(ChecksAdded, "Bounds checks added");
 STATISTIC(ChecksSkipped, "Bounds checks skipped");
 STATISTIC(ChecksUnable, "Bounds checks unable to add");
@@ -180,19 +183,27 @@ static bool addBoundsChecking(Function &F, 
TargetLibraryInfo &TLI,
   // will create a fresh block every time it is called.
   BasicBlock *TrapBB = nullptr;
   auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
-    if (TrapBB && SingleTrapBB)
-      return TrapBB;
-
     Function *Fn = IRB.GetInsertBlock()->getParent();
-    // FIXME: This debug location doesn't make a lot of sense in the
-    // `SingleTrapBB` case.
     auto DebugLoc = IRB.getCurrentDebugLocation();
     IRBuilder<>::InsertPointGuard Guard(IRB);
+
+    if (TrapBB && SingleTrapBB && !DebugTrapBB)
+      return TrapBB;
+
     TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
     IRB.SetInsertPoint(TrapBB);
 
-    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
-    CallInst *TrapCall = IRB.CreateCall(F, {});
+    Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : 
Intrinsic::trap;
+    auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
+
+    CallInst *TrapCall;
+    if (DebugTrapBB) {
+      TrapCall =
+          IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
+    } else {
+      TrapCall = IRB.CreateCall(F, {});
+    }
+
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     TrapCall->setDebugLoc(DebugLoc);

diff  --git a/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll 
b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
new file mode 100644
index 000000000000000..a3f34007e9b09f8
--- /dev/null
+++ b/llvm/test/Instrumentation/BoundsChecking/ubsan-unique-traps.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=bounds-checking -bounds-checking-unique-traps -S | 
FileCheck %s
+target datalayout = 
"e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare noalias ptr @malloc(i64) nounwind allocsize(0)
+
+define void @f() nounwind {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
+; CHECK-NEXT:    br label [[TRAP:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    store i32 3, ptr [[IDX]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    br label [[TRAP1:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    store i32 3, ptr [[IDX2]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @malloc(i64 32)
+; CHECK-NEXT:    [[IDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 8
+; CHECK-NEXT:    br label [[TRAP2:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    store i32 3, ptr [[IDX3]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 3) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap1:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 5) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       trap2:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 7) #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+;
+  %1 = tail call ptr @malloc(i64 32)
+  %idx = getelementptr inbounds i32, ptr %1, i64 8
+  store i32 3, ptr %idx, align 4
+  %2 = tail call ptr @malloc(i64 32)
+  %idx2 = getelementptr inbounds i32, ptr %2, i64 8
+  store i32 3, ptr %idx2, align 4
+  %3 = tail call ptr @malloc(i64 32)
+  %idx3 = getelementptr inbounds i32, ptr %3, i64 8
+  store i32 3, ptr %idx3, align 4
+  ret void
+}

diff  --git a/llvm/test/MC/AArch64/local-bounds-single-trap.ll 
b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
new file mode 100644
index 000000000000000..53a0e010537f096
--- /dev/null
+++ b/llvm/test/MC/AArch64/local-bounds-single-trap.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O3 -mtriple arm64-linux -filetype asm -o - %s | FileCheck %s 
-check-prefix CHECK-ASM
+; What this test does is check that even with nomerge, the functions still get 
merged in
+; compiled code as the ubsantrap call gets lowered to a single instruction: 
brk.
+
+
+@B = dso_local global [10 x i8] zeroinitializer, align 1
+@B2 = dso_local global [10 x i8] zeroinitializer, align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
+entry:
+; CHECK-ASM:   cmp     x8, #10
+; CHECK-ASM:   b.hi    .LBB0_5
+; CHECK-ASM: // %bb.1:                               // %entry
+; CHECK-ASM:   mov     w9, #10                         // =0xa
+; CHECK-ASM:   sub     x9, x9, x8
+; CHECK-ASM:   cbz     x9, .LBB0_5
+; CHECK-ASM: // %bb.2:
+; CHECK-ASM:   ldrsw   x9, [sp, #8]
+; CHECK-ASM:   adrp    x10, B
+; CHECK-ASM:   add     x10, x10, :lo12:B
+; CHECK-ASM:   strb    wzr, [x10, x8]
+; CHECK-ASM:   cmp     x9, #10
+; CHECK-ASM:   b.hi    .LBB0_5
+; CHECK-ASM: // %bb.3:
+; CHECK-ASM:   mov     w8, #10                         // =0xa
+; CHECK-ASM:   sub     x8, x8, x9
+; CHECK-ASM:   cbz     x8, .LBB0_5
+; CHECK-ASM: // %bb.4:
+; CHECK-ASM:   adrp    x8, B2
+; CHECK-ASM:   add     x8, x8, :lo12:B2
+; CHECK-ASM:   strb    wzr, [x8, x9]
+; CHECK-ASM:   add     sp, sp, #16
+; CHECK-ASM:   .cfi_def_cfa_offset 0
+; CHECK-ASM:   ret
+; CHECK-ASM: .LBB0_5:                                // %trap3
+; CHECK-ASM:   .cfi_restore_state
+; CHECK-ASM:   brk     #0x1
+  %i.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  store i32 %i, ptr %i.addr, align 4
+  store i32 %k, ptr %k.addr, align 4
+  %0 = load i32, ptr %i.addr, align 4
+  %idxprom = sext i32 %0 to i64
+  %1 = add i64 0, %idxprom
+  %arrayidx = getelementptr inbounds [10 x i8], ptr @B, i64 0, i64 %idxprom
+  %2 = sub i64 10, %1
+  %3 = icmp ult i64 10, %1
+  %4 = icmp ult i64 %2, 1
+  %5 = or i1 %3, %4
+  br i1 %5, label %trap, label %6
+
+6:                                                ; preds = %entry
+  store i8 0, ptr %arrayidx, align 1
+  %7 = load i32, ptr %k.addr, align 4
+  %idxprom1 = sext i32 %7 to i64
+  %8 = add i64 0, %idxprom1
+  %arrayidx2 = getelementptr inbounds [10 x i8], ptr @B2, i64 0, i64 %idxprom1
+  %9 = sub i64 10, %8
+  %10 = icmp ult i64 10, %8
+  %11 = icmp ult i64 %9, 1
+  %12 = or i1 %10, %11
+  br i1 %12, label %trap3, label %13
+
+13:                                               ; preds = %6
+  store i8 0, ptr %arrayidx2, align 1
+  ret void
+
+trap:                                             ; preds = %entry
+  call void @llvm.trap() #2
+  unreachable
+
+trap3:                                            ; preds = %6
+  call void @llvm.trap() #2
+  unreachable
+}
+
+; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
+declare void @llvm.trap() #1
+
+attributes #0 = { noinline nounwind uwtable }
+attributes #1 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #2 = { noreturn nounwind nomerge }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to