https://llvm.org/bugs/show_bug.cgi?id=28515
Bug ID: 28515 Summary: loop vectorizer miscompiles n-ary array concatenation w/ avx2 Product: libraries Version: trunk Hardware: PC OS: Linux Status: NEW Severity: normal Priority: P Component: Loop Optimizer Assignee: unassignedb...@nondot.org Reporter: david.majne...@gmail.com CC: elena.demikhov...@intel.com, llvm-bugs@lists.llvm.org, mku...@google.com, mssim...@codeaurora.org, w...@google.com Classification: Unclassified consider: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @0 = private constant [2 x i32] [i32 0, i32 1] @1 = private constant [19 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60] @2 = private constant [16 x i32] [i32 -200, i32 -199, i32 -198, i32 -197, i32 -196, i32 -195, i32 -194, i32 -193, i32 -192, i32 -191, i32 -190, i32 -189, i32 -188, i32 -187, i32 -186, i32 -185] @3 = private constant [1 x i32] [i32 5555] define void @concat(i8* nocapture %retval) local_unnamed_addr #0 { entry: %0 = bitcast i8* %retval to [38 x i32]* br label %loop_body.dim.0 loop_body.dim.0: ; preds = %entry, %loop_header.dim.0 %invar_address.dim.0.01 = phi i64 [ 0, %entry ], [ %19, %loop_header.dim.0 ] %1 = icmp ult i64 %invar_address.dim.0.01, 2 br i1 %1, label %2, label %5 ; <label>:2: ; preds = %loop_body.dim.0 %3 = getelementptr inbounds [2 x i32], [2 x i32]* @0, i64 0, i64 %invar_address.dim.0.01 %4 = load i32, i32* %3, align 4 br label %loop_header.dim.0 ; <label>:5: ; preds = %loop_body.dim.0 %6 = add nsw i64 %invar_address.dim.0.01, -2 %7 = icmp ult i64 %6, 19 br i1 %7, label %8, label %11 ; <label>:8: ; preds = %5 %9 = getelementptr inbounds [19 x i32], [19 x i32]* @1, i64 0, i64 %6 %10 = load i32, i32* %9, align 4 br label %loop_header.dim.0 ; <label>:11: ; preds = %5 %12 = add nsw i64 %invar_address.dim.0.01, -21 %13 = icmp ult i64 %12, 16 br i1 %13, label %14, label %loop_header.dim.0 ; <label>:14: ; preds = %11 %15 = getelementptr inbounds [16 x i32], [16 x i32]* @2, i64 0, i64 %12 %16 = load i32, i32* %15, align 4 br label %loop_header.dim.0 loop_header.dim.0: ; preds = %11, %14, %8, %2 %17 = phi i32 [ %4, %2 ], [ %10, %8 ], [ %16, %14 ], [ 5555, %11 ] %18 = getelementptr inbounds [38 x i32], [38 x i32]* %0, i64 0, i64 %invar_address.dim.0.01 store i32 %17, i32* %18, align 4 %19 = add nuw nsw i64 %invar_address.dim.0.01, 1 %exitcond = icmp eq i64 %19, 38 br i1 %exitcond, label %loop_exit.dim.0, label %loop_body.dim.0 loop_exit.dim.0: ; preds = %loop_header.dim.0 ret void } attributes #0 = { "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" } This IR is quite simple: it logically concatenates @0, @1, @2 and @3 by loading from them and storing to the argument pointer "retval". However, the generated IR after optimizations (-O3) is dramatically altered. Some snippets: br i1 undef, label %31, label %loop_header.dim.0.epil %wide.masked.load8 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([19 x i32], [19 x i32]* @1, i64 242720316759336205, i64 7) to <8 x i32>*) This is likely due to the loop vectorizer. -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs