https://llvm.org/bugs/show_bug.cgi?id=26642
Bug ID: 26642 Summary: Miscompilation caused by stack adjustment code clobbering used registers Product: libraries Version: trunk Hardware: PC OS: Linux Status: NEW Severity: normal Priority: P Component: Backend: AArch64 Assignee: unassignedb...@nondot.org Reporter: andrew.b.ad...@gmail.com CC: llvm-bugs@lists.llvm.org Classification: Unclassified Some time on Friday one of the Halide tests starting returning wrong values on the arm64 buildbot: http://buildbot.halide-lang.org:8010/builders/arm64-linux-64-trunk/builds/69 (Halide is a language built on LLVM. Our buildbots pull and test against trunk llvm every four hours or so.) The cause seems to be stack adjustment code that clobbers an in-use register. Below is .ll from llvm 3.7, and the asm it produces, and .ll from trunk llvm, and the asm it produces. Pay attention to x9. It's the address of one of the outputs (the one that's coming out as wrong). In the working code, x9 is used as a temporary to adjust the stack downwards, and then is loaded from [x1], and then used as the address to write the output to near the end of the function (str w11, [x9]) In the broken code, x9 is loaded from the argument, *then* used as a temporary to adjust the stack downward (clobbering it), and then used as an address to write to by the same store instruction. This now becomes a useless store to the stack instead of actually writing the output value. Good ll and asm: ; ModuleID = 'halide_module_f10' target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnueabihf" %struct.halide_filter_argument_t = type { i8*, i32, i32, i32, i32, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t* } %struct.halide_scalar_value_t = type { %union.anon } %union.anon = type { double } %struct.halide_filter_metadata_t = type { i32, i32, %struct.halide_filter_argument_t*, i8*, i8* } %struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [2 x i8] } @str = private constant [6 x i8] c"f10.0\00", align 32 @str.2 = private constant [6 x i8] c"f10.1\00", align 32 @0 = private constant [2 x %struct.halide_filter_argument_t] [%struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str.2, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }] @str.3 = private constant [35 x i8] c"arm-64-linux-no_asserts-no_runtime\00", align 32 @str.4 = private constant [4 x i8] c"f10\00", align 32 @f10_metadata = constant %struct.halide_filter_metadata_t { i32 0, i32 2, %struct.halide_filter_argument_t* getelementptr inbounds ([2 x %struct.halide_filter_argument_t], [2 x %struct.halide_filter_argument_t]* @0, i32 0, i32 0), i8* getelementptr inbounds ([35 x i8], [35 x i8]* @str.3, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str.4, i32 0, i32 0) } ; Function Attrs: nounwind define i32 @__f10(%struct.buffer_t* noalias nocapture %f10.0.buffer, %struct.buffer_t* noalias nocapture %f10.1.buffer) #0 { entry: %f8.0.host59 = alloca [13 x <8 x i32>], align 32 %f8.1.host60 = alloca [13 x <8 x i32>], align 32 %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 1 %f10.0.host = load i8*, i8** %buf_host, align 8 %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 0 %f10.0.dev = load i64, i64* %buf_dev, align 8 %0 = icmp eq i64 %f10.0.dev, 0 %1 = icmp eq i8* %f10.0.host, null %f10.0.host_and_dev_are_null = and i1 %1, %0 %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 1 %f10.1.host = load i8*, i8** %buf_host10, align 8 %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 0 %f10.1.dev = load i64, i64* %buf_dev11, align 8 %2 = icmp eq i64 %f10.1.dev, 0 %3 = icmp eq i8* %f10.1.host, null %f10.1.host_and_dev_are_null = and i1 %3, %2 br i1 %f10.0.host_and_dev_are_null, label %true_bb, label %after_bb true_bb: ; preds = %entry %buf_elem_size27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 5 store i32 4, i32* %buf_elem_size27, align 4 %buf_extent29 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 2, i64 0 %4 = bitcast i32* %buf_extent29 to i8* call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 48, i32 4, i1 false) br label %after_bb after_bb: ; preds = %entry, %true_bb br i1 %f10.1.host_and_dev_are_null, label %after_bb42.thread, label %after_bb42 after_bb42.thread: ; preds = %after_bb %buf_elem_size43 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 5 store i32 4, i32* %buf_elem_size43, align 4 %buf_extent45 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 2, i64 0 %5 = bitcast i32* %buf_extent45 to i8* call void @llvm.memset.p0i8.i64(i8* %5, i8 0, i64 48, i32 4, i1 false) br label %destructor_block after_bb42: ; preds = %after_bb br i1 %f10.0.host_and_dev_are_null, label %destructor_block, label %"for f8.s0.v0" "for f8.s0.v0": ; preds = %after_bb42, %"for f8.s0.v0" %indvars.iv61 = phi i64 [ %9, %"for f8.s0.v0" ], [ -1, %after_bb42 ] %f8.s0.v0 = phi i32 [ %14, %"for f8.s0.v0" ], [ -1, %after_bb42 ] %6 = sub nsw i32 100, %f8.s0.v0 %7 = sext i32 %6 to i64 %8 = mul nsw i64 %7, %indvars.iv61 %9 = add nsw i64 %indvars.iv61, 1 %10 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 %9 %11 = trunc i64 %8 to i32 store i32 %11, i32* %10, align 4, !tbaa !4 %12 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 %9 %13 = trunc i64 %indvars.iv61 to i32 store i32 %13, i32* %12, align 4, !tbaa !6 %14 = add nsw i32 %f8.s0.v0, 1 %exitcond63 = icmp eq i64 %9, 100 br i1 %exitcond63, label %"for f8.s1.r30.x$r.preheader", label %"for f8.s0.v0" "for f8.s1.r30.x$r.preheader": ; preds = %"for f8.s0.v0" %15 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 0 %16 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 0 %t24.pre = load i32, i32* %15, align 32, !tbaa !7 %.pre = load i32, i32* %16, align 32, !tbaa !18 br label %"for f8.s1.r30.x$r" "for f8.s1.r30.x$r": ; preds = %"for f8.s1.r30.x$r.preheader", %"for f8.s1.r30.x$r" %17 = phi i32 [ %.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.1.value, %"for f8.s1.r30.x$r" ] %t24 = phi i32 [ %t24.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.0.value, %"for f8.s1.r30.x$r" ] %indvars.iv = phi i64 [ 0, %"for f8.s1.r30.x$r.preheader" ], [ %18, %"for f8.s1.r30.x$r" ] %18 = add nuw nsw i64 %indvars.iv, 1 %19 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 %18 %t25 = load i32, i32* %19, align 4, !tbaa !4 %20 = icmp slt i32 %t24, %t25 %f8.0.value = select i1 %20, i32 %t25, i32 %t24 %21 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 %18 %22 = load i32, i32* %21, align 4, !tbaa !6 %f8.1.value = select i1 %20, i32 %22, i32 %17 store i32 %f8.0.value, i32* %15, align 32, !tbaa !7 store i32 %f8.1.value, i32* %16, align 32, !tbaa !18 %exitcond = icmp eq i64 %18, 100 br i1 %exitcond, label %"consume f8", label %"for f8.s1.r30.x$r" "consume f8": ; preds = %"for f8.s1.r30.x$r" %23 = bitcast i8* %f10.0.host to i32* store i32 %f8.0.value, i32* %23, align 4, !tbaa !29 %24 = bitcast i8* %f10.1.host to i32* store i32 %f8.1.value, i32* %24, align 4, !tbaa !41 br label %destructor_block destructor_block: ; preds = %after_bb42.thread, %"consume f8", %after_bb42 ret i32 0 } ; Function Attrs: nounwind define i32 @f10(%struct.buffer_t* noalias nocapture %f10.0.buffer, %struct.buffer_t* noalias nocapture %f10.1.buffer) #0 { entry: %__f10_result = tail call i32 @__f10(%struct.buffer_t* %f10.0.buffer, %struct.buffer_t* %f10.1.buffer) #0 ret i32 0 } ; Function Attrs: nounwind define i32 @f10_argv(i8** nocapture readonly) #0 { entry: %1 = bitcast i8** %0 to %struct.buffer_t** %2 = load %struct.buffer_t*, %struct.buffer_t** %1, align 8 %3 = getelementptr i8*, i8** %0, i64 1 %4 = bitcast i8** %3 to %struct.buffer_t** %5 = load %struct.buffer_t*, %struct.buffer_t** %4, align 8 %6 = tail call i32 @f10(%struct.buffer_t* %2, %struct.buffer_t* %5) ret i32 0 } ; Function Attrs: nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 attributes #0 = { nounwind } !llvm.ident = !{!0, !0} !llvm.module.flags = !{!1, !2, !3} !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final 251413)"} !1 = !{i32 2, !"halide_use_soft_float_abi", i32 0} !2 = !{i32 2, !"halide_mcpu", !"generic"} !3 = !{i32 2, !"halide_mattrs", !""} !4 = !{!"f8.0", !5} !5 = !{!"Halide buffer"} !6 = !{!"f8.1", !5} !7 = !{!"f8.0.width1.base0", !8} !8 = !{!"f8.0.width2.base0", !9} !9 = !{!"f8.0.width4.base0", !10} !10 = !{!"f8.0.width8.base0", !11} !11 = !{!"f8.0.width16.base0", !12} !12 = !{!"f8.0.width32.base0", !13} !13 = !{!"f8.0.width64.base0", !14} !14 = !{!"f8.0.width128.base0", !15} !15 = !{!"f8.0.width256.base0", !16} !16 = !{!"f8.0.width512.base0", !17} !17 = !{!"f8.0.width1024.base0", !4} !18 = !{!"f8.1.width1.base0", !19} !19 = !{!"f8.1.width2.base0", !20} !20 = !{!"f8.1.width4.base0", !21} !21 = !{!"f8.1.width8.base0", !22} !22 = !{!"f8.1.width16.base0", !23} !23 = !{!"f8.1.width32.base0", !24} !24 = !{!"f8.1.width64.base0", !25} !25 = !{!"f8.1.width128.base0", !26} !26 = !{!"f8.1.width256.base0", !27} !27 = !{!"f8.1.width512.base0", !28} !28 = !{!"f8.1.width1024.base0", !6} !29 = !{!"f10.0.width1.base0", !30} !30 = !{!"f10.0.width2.base0", !31} !31 = !{!"f10.0.width4.base0", !32} !32 = !{!"f10.0.width8.base0", !33} !33 = !{!"f10.0.width16.base0", !34} !34 = !{!"f10.0.width32.base0", !35} !35 = !{!"f10.0.width64.base0", !36} !36 = !{!"f10.0.width128.base0", !37} !37 = !{!"f10.0.width256.base0", !38} !38 = !{!"f10.0.width512.base0", !39} !39 = !{!"f10.0.width1024.base0", !40} !40 = !{!"f10.0", !5} !41 = !{!"f10.1.width1.base0", !42} !42 = !{!"f10.1.width2.base0", !43} !43 = !{!"f10.1.width4.base0", !44} !44 = !{!"f10.1.width8.base0", !45} !45 = !{!"f10.1.width16.base0", !46} !46 = !{!"f10.1.width32.base0", !47} !47 = !{!"f10.1.width64.base0", !48} !48 = !{!"f10.1.width128.base0", !49} !49 = !{!"f10.1.width256.base0", !50} !50 = !{!"f10.1.width512.base0", !51} !51 = !{!"f10.1.width1024.base0", !52} !52 = !{!"f10.1", !5} .text .file "halide_module_f10" .section .text.__f10,"ax",@progbits .globl __f10 .align 2 .type __f10,@function __f10: // @__f10 // BB#0: // %entry stp x28, x27, [sp, #-32]! stp x29, x30, [sp, #16] add x29, sp, #16 // =16 sub x9, sp, #832 // =832 and sp, x9, #0xffffffffffffffe0 ldp x10, x8, [x0] ldp x11, x9, [x1] orr x12, x8, x10 orr x10, x9, x11 cmp x12, #0 // =0 cset w11, eq cmp x10, #0 // =0 cset w10, eq cbnz x12, .LBB0_2 // BB#1: // %true_bb orr w12, wzr, #0x4 stp xzr, xzr, [x0, #48] stp xzr, xzr, [x0, #32] stp xzr, xzr, [x0, #16] str w12, [x0, #64] .LBB0_2: // %after_bb cbz w10, .LBB0_4 // BB#3: // %after_bb42.thread orr w8, wzr, #0x4 stp xzr, xzr, [x1, #48] stp xzr, xzr, [x1, #32] stp xzr, xzr, [x1, #16] str w8, [x1, #64] b .LBB0_10 .LBB0_4: // %after_bb42 movz w10, #0x65 tbnz w11, #0, .LBB0_10 // BB#5: movn w11, #0 movn x12, #0 mov x13, sp add x14, sp, #416 // =416 .LBB0_6: // %for f8.s0.v0 // =>This Inner Loop Header: Depth=1 mul w15, w10, w12 add x12, x12, #1 // =1 str w11, [x13], #4 add w11, w11, #1 // =1 str w15, [x14], #4 sub x10, x10, #1 // =1 cbnz x10, .LBB0_6 // BB#7: // %for f8.s1.r30.x$r.preheader ldr w10, [sp, #416] ldr w11, [sp] mov x12, sp orr x12, x12, #0x4 add x13, sp, #416 // =416 orr x13, x13, #0x4 movz w14, #0x64 .LBB0_8: // %for f8.s1.r30.x$r // =>This Inner Loop Header: Depth=1 ldr w15, [x13], #4 ldr w16, [x12], #4 cmp w10, w15 csel w10, w15, w10, lt csel w11, w16, w11, lt str w10, [sp, #416] str w11, [sp] sub x14, x14, #1 // =1 cbnz x14, .LBB0_8 // BB#9: // %consume f8 str w10, [x8] str w11, [x9] .LBB0_10: // %destructor_block mov w0, wzr sub sp, x29, #16 // =16 ldp x29, x30, [sp, #16] ldp x28, x27, [sp], #32 ret .Lfunc_end0: .size __f10, .Lfunc_end0-__f10 .section .text.f10,"ax",@progbits .globl f10 .align 2 .type f10,@function f10: // @f10 // BB#0: // %entry stp x29, x30, [sp, #-16]! mov x29, sp bl __f10 mov w0, wzr ldp x29, x30, [sp], #16 ret .Lfunc_end1: .size f10, .Lfunc_end1-f10 .section .text.f10_argv,"ax",@progbits .globl f10_argv .align 2 .type f10_argv,@function f10_argv: // @f10_argv // BB#0: // %entry stp x29, x30, [sp, #-16]! mov x29, sp ldp x8, x1, [x0] mov x0, x8 bl f10 mov w0, wzr ldp x29, x30, [sp], #16 ret .Lfunc_end2: .size f10_argv, .Lfunc_end2-f10_argv .type .Lstr,@object // @str .section .rodata,"a",@progbits .align 5 .Lstr: .asciz "f10.0" .size .Lstr, 6 .type .Lstr.2,@object // @str.2 .align 5 .Lstr.2: .asciz "f10.1" .size .Lstr.2, 6 .type .L__unnamed_1,@object // @0 .section .data.rel.ro.local,"aw",@progbits .align 4 .L__unnamed_1: .xword .Lstr .word 2 // 0x2 .word 0 // 0x0 .word 0 // 0x0 .word 32 // 0x20 .xword 0 .xword 0 .xword 0 .xword .Lstr.2 .word 2 // 0x2 .word 0 // 0x0 .word 0 // 0x0 .word 32 // 0x20 .xword 0 .xword 0 .xword 0 .size .L__unnamed_1, 96 .type .Lstr.3,@object // @str.3 .section .rodata,"a",@progbits .align 5 .Lstr.3: .asciz "arm-64-linux-no_asserts-no_runtime" .size .Lstr.3, 35 .type .Lstr.4,@object // @str.4 .align 5 .Lstr.4: .asciz "f10" .size .Lstr.4, 4 .type f10_metadata,@object // @f10_metadata .section .data.rel.ro.local,"aw",@progbits .globl f10_metadata .align 4 f10_metadata: .word 0 // 0x0 .word 2 // 0x2 .xword .L__unnamed_1 .xword .Lstr.3 .xword .Lstr.4 .size f10_metadata, 32 .ident "clang version 3.7.0 (tags/RELEASE_370/final 251413)" .ident "clang version 3.7.0 (tags/RELEASE_370/final 251413)" .section ".note.GNU-stack","",@progbits Bad ll and asm: ; ModuleID = 'halide_module_f10' target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnueabihf" %struct.halide_filter_argument_t = type { i8*, i32, i32, i32, i32, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t*, %struct.halide_scalar_value_t* } %struct.halide_scalar_value_t = type { %union.anon } %union.anon = type { double } %struct.halide_filter_metadata_t = type { i32, i32, %struct.halide_filter_argument_t*, i8*, i8* } %struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [2 x i8] } @str = private constant [6 x i8] c"f10.0\00", align 32 @str.2 = private constant [6 x i8] c"f10.1\00", align 32 @0 = private constant [2 x %struct.halide_filter_argument_t] [%struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }, %struct.halide_filter_argument_t { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str.2, i32 0, i32 0), i32 2, i32 0, i32 0, i32 32, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null, %struct.halide_scalar_value_t* null }] @str.3 = private constant [35 x i8] c"arm-64-linux-no_asserts-no_runtime\00", align 32 @str.4 = private constant [4 x i8] c"f10\00", align 32 @f10_metadata = constant %struct.halide_filter_metadata_t { i32 0, i32 2, %struct.halide_filter_argument_t* getelementptr inbounds ([2 x %struct.halide_filter_argument_t], [2 x %struct.halide_filter_argument_t]* @0, i32 0, i32 0), i8* getelementptr inbounds ([35 x i8], [35 x i8]* @str.3, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str.4, i32 0, i32 0) } ; Function Attrs: norecurse nounwind define i32 @__f10(%struct.buffer_t* noalias nocapture %f10.0.buffer, %struct.buffer_t* noalias nocapture %f10.1.buffer) #0 { entry: %f8.0.host59 = alloca [13 x <8 x i32>], align 32 %f8.1.host60 = alloca [13 x <8 x i32>], align 32 %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 1 %f10.0.host = load i8*, i8** %buf_host, align 8 %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 0 %f10.0.dev = load i64, i64* %buf_dev, align 8 %0 = icmp eq i64 %f10.0.dev, 0 %1 = icmp eq i8* %f10.0.host, null %f10.0.host_and_dev_are_null = and i1 %1, %0 %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 1 %f10.1.host = load i8*, i8** %buf_host10, align 8 %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 0 %f10.1.dev = load i64, i64* %buf_dev11, align 8 %2 = icmp eq i64 %f10.1.dev, 0 %3 = icmp eq i8* %f10.1.host, null %f10.1.host_and_dev_are_null = and i1 %3, %2 br i1 %f10.0.host_and_dev_are_null, label %true_bb, label %after_bb true_bb: ; preds = %entry %buf_elem_size27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 5 store i32 4, i32* %buf_elem_size27, align 4 %buf_extent29 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.0.buffer, i64 0, i32 2, i64 0 %4 = bitcast i32* %buf_extent29 to i8* call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 48, i32 4, i1 false) br label %after_bb after_bb: ; preds = %entry, %true_bb br i1 %f10.1.host_and_dev_are_null, label %after_bb42.thread, label %after_bb42 after_bb42.thread: ; preds = %after_bb %buf_elem_size43 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 5 store i32 4, i32* %buf_elem_size43, align 4 %buf_extent45 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f10.1.buffer, i64 0, i32 2, i64 0 %5 = bitcast i32* %buf_extent45 to i8* call void @llvm.memset.p0i8.i64(i8* %5, i8 0, i64 48, i32 4, i1 false) br label %destructor_block after_bb42: ; preds = %after_bb br i1 %f10.0.host_and_dev_are_null, label %destructor_block, label %"for f8.s0.v0" "for f8.s0.v0": ; preds = %after_bb42, %"for f8.s0.v0" %indvars.iv61 = phi i64 [ %9, %"for f8.s0.v0" ], [ -1, %after_bb42 ] %f8.s0.v0 = phi i32 [ %14, %"for f8.s0.v0" ], [ -1, %after_bb42 ] %6 = sub nsw i32 100, %f8.s0.v0 %7 = sext i32 %6 to i64 %8 = mul nsw i64 %7, %indvars.iv61 %9 = add nsw i64 %indvars.iv61, 1 %10 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 %9 %11 = trunc i64 %8 to i32 store i32 %11, i32* %10, align 4, !tbaa !4 %12 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 %9 %13 = trunc i64 %indvars.iv61 to i32 store i32 %13, i32* %12, align 4, !tbaa !6 %14 = add nsw i32 %f8.s0.v0, 1 %15 = icmp eq i64 %9, 100 br i1 %15, label %"for f8.s1.r30.x$r.preheader", label %"for f8.s0.v0" "for f8.s1.r30.x$r.preheader": ; preds = %"for f8.s0.v0" %16 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 0 %17 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 0 %t24.pre = load i32, i32* %16, align 32, !tbaa !7 %.pre = load i32, i32* %17, align 32, !tbaa !18 br label %"for f8.s1.r30.x$r" "for f8.s1.r30.x$r": ; preds = %"for f8.s1.r30.x$r.preheader", %"for f8.s1.r30.x$r" %18 = phi i32 [ %.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.1.value, %"for f8.s1.r30.x$r" ] %t24 = phi i32 [ %t24.pre, %"for f8.s1.r30.x$r.preheader" ], [ %f8.0.value, %"for f8.s1.r30.x$r" ] %indvars.iv = phi i64 [ 0, %"for f8.s1.r30.x$r.preheader" ], [ %19, %"for f8.s1.r30.x$r" ] %19 = add nuw nsw i64 %indvars.iv, 1 %20 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.0.host59, i64 0, i64 0, i64 %19 %t25 = load i32, i32* %20, align 4, !tbaa !4 %21 = icmp slt i32 %t24, %t25 %f8.0.value = select i1 %21, i32 %t25, i32 %t24 %22 = getelementptr inbounds [13 x <8 x i32>], [13 x <8 x i32>]* %f8.1.host60, i64 0, i64 0, i64 %19 %23 = load i32, i32* %22, align 4, !tbaa !6 %f8.1.value = select i1 %21, i32 %23, i32 %18 store i32 %f8.0.value, i32* %16, align 32, !tbaa !7 store i32 %f8.1.value, i32* %17, align 32, !tbaa !18 %24 = icmp eq i64 %19, 100 br i1 %24, label %"consume f8", label %"for f8.s1.r30.x$r" "consume f8": ; preds = %"for f8.s1.r30.x$r" %25 = bitcast i8* %f10.0.host to i32* store i32 %f8.0.value, i32* %25, align 4, !tbaa !29 %26 = bitcast i8* %f10.1.host to i32* store i32 %f8.1.value, i32* %26, align 4, !tbaa !41 br label %destructor_block destructor_block: ; preds = %after_bb42.thread, %"consume f8", %after_bb42 ret i32 0 } ; Function Attrs: norecurse nounwind define i32 @f10(%struct.buffer_t* noalias nocapture %f10.0.buffer, %struct.buffer_t* noalias nocapture %f10.1.buffer) #0 { entry: %__f10_result = tail call i32 @__f10(%struct.buffer_t* %f10.0.buffer, %struct.buffer_t* %f10.1.buffer) #2 ret i32 0 } ; Function Attrs: norecurse nounwind define i32 @f10_argv(i8** nocapture readonly) #0 { entry: %1 = bitcast i8** %0 to %struct.buffer_t** %2 = load %struct.buffer_t*, %struct.buffer_t** %1, align 8 %3 = getelementptr i8*, i8** %0, i64 1 %4 = bitcast i8** %3 to %struct.buffer_t** %5 = load %struct.buffer_t*, %struct.buffer_t** %4, align 8 %6 = tail call i32 @f10(%struct.buffer_t* %2, %struct.buffer_t* %5) ret i32 0 } ; Function Attrs: argmemonly nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1 attributes #0 = { norecurse nounwind } attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } !llvm.ident = !{!0, !0} !llvm.module.flags = !{!1, !2, !3} !0 = !{!"clang version 3.9.0 (trunk 260979)"} !1 = !{i32 2, !"halide_use_soft_float_abi", i32 0} !2 = !{i32 2, !"halide_mcpu", !"generic"} !3 = !{i32 2, !"halide_mattrs", !""} !4 = !{!"f8.0", !5} !5 = !{!"Halide buffer"} !6 = !{!"f8.1", !5} !7 = !{!"f8.0.width1.base0", !8} !8 = !{!"f8.0.width2.base0", !9} !9 = !{!"f8.0.width4.base0", !10} !10 = !{!"f8.0.width8.base0", !11} !11 = !{!"f8.0.width16.base0", !12} !12 = !{!"f8.0.width32.base0", !13} !13 = !{!"f8.0.width64.base0", !14} !14 = !{!"f8.0.width128.base0", !15} !15 = !{!"f8.0.width256.base0", !16} !16 = !{!"f8.0.width512.base0", !17} !17 = !{!"f8.0.width1024.base0", !4} !18 = !{!"f8.1.width1.base0", !19} !19 = !{!"f8.1.width2.base0", !20} !20 = !{!"f8.1.width4.base0", !21} !21 = !{!"f8.1.width8.base0", !22} !22 = !{!"f8.1.width16.base0", !23} !23 = !{!"f8.1.width32.base0", !24} !24 = !{!"f8.1.width64.base0", !25} !25 = !{!"f8.1.width128.base0", !26} !26 = !{!"f8.1.width256.base0", !27} !27 = !{!"f8.1.width512.base0", !28} !28 = !{!"f8.1.width1024.base0", !6} !29 = !{!"f10.0.width1.base0", !30} !30 = !{!"f10.0.width2.base0", !31} !31 = !{!"f10.0.width4.base0", !32} !32 = !{!"f10.0.width8.base0", !33} !33 = !{!"f10.0.width16.base0", !34} !34 = !{!"f10.0.width32.base0", !35} !35 = !{!"f10.0.width64.base0", !36} !36 = !{!"f10.0.width128.base0", !37} !37 = !{!"f10.0.width256.base0", !38} !38 = !{!"f10.0.width512.base0", !39} !39 = !{!"f10.0.width1024.base0", !40} !40 = !{!"f10.0", !5} !41 = !{!"f10.1.width1.base0", !42} !42 = !{!"f10.1.width2.base0", !43} !43 = !{!"f10.1.width4.base0", !44} !44 = !{!"f10.1.width8.base0", !45} !45 = !{!"f10.1.width16.base0", !46} !46 = !{!"f10.1.width32.base0", !47} !47 = !{!"f10.1.width64.base0", !48} !48 = !{!"f10.1.width128.base0", !49} !49 = !{!"f10.1.width256.base0", !50} !50 = !{!"f10.1.width512.base0", !51} !51 = !{!"f10.1.width1024.base0", !52} !52 = !{!"f10.1", !5} .text .file "halide_module_f10" .section .text.__f10,"ax",@progbits .globl __f10 .p2align 2 .type __f10,@function __f10: // @__f10 // BB#0: // %entry ldp x10, x8, [x0] ldp x11, x9, [x1] orr x12, x8, x10 orr x10, x9, x11 cmp x12, #0 // =0 cset w11, eq cmp x10, #0 // =0 cset w10, eq cbnz x12, .LBB0_2 // BB#1: // %true_bb orr w12, wzr, #0x4 stp xzr, xzr, [x0, #48] stp xzr, xzr, [x0, #32] stp xzr, xzr, [x0, #16] str w12, [x0, #64] .LBB0_2: Success! // %after_bb cbz w10, .LBB0_4 // BB#3: // %after_bb42.thread orr w8, wzr, #0x4 mov w0, wzr stp xzr, xzr, [x1, #48] stp xzr, xzr, [x1, #32] stp xzr, xzr, [x1, #16] str w8, [x1, #64] ret .LBB0_4: // %after_bb42 movz w10, #0x65 tbnz w11, #0, .LBB0_10 // BB#5: str x28, [sp, #-32]! sub x9, sp, #832 // =832 stp x29, x30, [sp, #16] add x29, sp, #16 // =16 and sp, x9, #0xffffffffffffffe0 movn x13, #0 mov x11, sp add x12, sp, #416 // =416 .LBB0_6: // %"for f8.s0.v0" // =>This Inner Loop Header: Depth=1 mul w14, w10, w13 str w13, [x11], #4 add x13, x13, #1 // =1 sub x10, x10, #1 // =1 str w14, [x12], #4 cmp x13, #100 // =100 b.ne .LBB0_6 // BB#7: // %"for f8.s1.r30.x$r.preheader" ldr w10, [sp, #416] ldr w11, [sp] orr w12, wzr, #0x4 add x13, sp, #416 // =416 mov x14, sp .LBB0_8: // %"for f8.s1.r30.x$r" // =>This Inner Loop Header: Depth=1 ldr w15, [x13, x12] ldr w16, [x14, x12] add x12, x12, #4 // =4 cmp w10, w15 csel w10, w15, w10, lt csel w11, w16, w11, lt str w10, [sp, #416] str w11, [sp] cmp x12, #404 // =404 b.ne .LBB0_8 // BB#9: // %"consume f8" str w10, [x8] str w11, [x9] sub sp, x29, #16 // =16 ldp x29, x30, [sp, #16] ldr x28, [sp], #32 .LBB0_10: // %destructor_block mov w0, wzr ret .Lfunc_end0: .size __f10, .Lfunc_end0-__f10 .section .text.f10,"ax",@progbits .globl f10 .p2align 2 .type f10,@function f10: // @f10 // BB#0: // %entry stp x29, x30, [sp, #-16]! mov x29, sp bl __f10 mov w0, wzr ldp x29, x30, [sp], #16 ret .Lfunc_end1: .size f10, .Lfunc_end1-f10 .section .text.f10_argv,"ax",@progbits .globl f10_argv .p2align 2 .type f10_argv,@function f10_argv: // @f10_argv // BB#0: // %entry stp x29, x30, [sp, #-16]! ldp x8, x1, [x0] mov x29, sp mov x0, x8 bl f10 mov w0, wzr ldp x29, x30, [sp], #16 ret .Lfunc_end2: .size f10_argv, .Lfunc_end2-f10_argv .type .Lstr,@object // @str .section .rodata,"a",@progbits .p2align 5 .Lstr: .asciz "f10.0" .size .Lstr, 6 .type .Lstr.2,@object // @str.2 .p2align 5 .Lstr.2: .asciz "f10.1" .size .Lstr.2, 6 .type .L__unnamed_1,@object // @0 .section .data.rel.ro,"aw",@progbits .p2align 4 .L__unnamed_1: .xword .Lstr .word 2 // 0x2 .word 0 // 0x0 .word 0 // 0x0 .word 32 // 0x20 .xword 0 .xword 0 .xword 0 .xword .Lstr.2 .word 2 // 0x2 .word 0 // 0x0 .word 0 // 0x0 .word 32 // 0x20 .xword 0 .xword 0 .xword 0 .size .L__unnamed_1, 96 .type .Lstr.3,@object // @str.3 .section .rodata,"a",@progbits .p2align 5 .Lstr.3: .asciz "arm-64-linux-no_asserts-no_runtime" .size .Lstr.3, 35 .type .Lstr.4,@object // @str.4 .p2align 5 .Lstr.4: .asciz "f10" .size .Lstr.4, 4 .type f10_metadata,@object // @f10_metadata .section .data.rel.ro,"aw",@progbits .globl f10_metadata .p2align 4 f10_metadata: .word 0 // 0x0 .word 2 // 0x2 .xword .L__unnamed_1 .xword .Lstr.3 .xword .Lstr.4 .size f10_metadata, 32 .ident "clang version 3.9.0 (trunk 260979)" .ident "clang version 3.9.0 (trunk 260979)" .section ".note.GNU-stack","",@progbits -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs