https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/131985
>From 889f40c5570af8a02e301c2bf3c6382f69210140 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga <kasuga.ryot...@fujitsu.com> Date: Mon, 17 Mar 2025 11:24:47 +0000 Subject: [PATCH 1/3] [LoopUtils] Fix metadata generated by makeFollowupLoopID When multiple pragma for loop transformations are specified, such as: ```c for (...) { } ``` The generated metadata would look like this: ``` !0 = distinct !{!0, !1, !2} !1 = !{"llvm.loop.vectorize.enable", i1 true} !2 = !{"llvm.loop.vectorize.followup_all", !3} !3 = distinct !{!3, !4, !5} !4 = !{"llvm.loop.isvectorized"} !5 = !{"llvm.loop.unroll_count", i32, 8} ``` For a loop with `!0` as its LoopID, the new LoopID after vectorized should be like as below, so that we can know that this loop is already vectorized and should be unrolled with specified count: ``` !6 = distinct !{!6, !4, !5} ``` However, the current implementation creates new LoopID like: ``` !7 = distinct !{!7, !3} ``` Therefore subsequent passes like LoopUnroll fails to recognize the attributes of this loop correctly. This patch fixes `makeFollowupLoopID`, which creates a new LoopID after each transformation. If the follow-up metadata (`!3` in the above case) is a LoopID, the new LoopID will contain its operands (`!4` and `!5`) instead of the metadata itself. --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 29 +++++ .../LoopVectorize/make-followup-loop-id.ll | 102 ++++++++++++++---- 2 files changed, 111 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 84c08556f8a25..4a6105add953f 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -317,6 +317,35 @@ std::optional<MDNode *> llvm::makeFollowupLoopID( HasAnyFollowup = true; for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { + // The followup metadata typically forms as follows: + // + // !0 = distinct !{!0, !1, !2} + // !1 = !{!"llvm.loop.distribute.enable", i1 true} + // !2 = !{!"llvm.loop.distribute.followup_all", !3} + // !3 = distinct !{!3, !4} + // !4 = !{!"llvm.loop.vectorize.enable", i1 true} + // + // If we push Option (!3 in this case) in MDs, the new metadata looks + // something like: + // + // !5 = distinct !{!5, !3} + // + // This doesn't contain !4, so the vectorization pass doesn't recognize + // this loop as vectorization enabled. To make the new metadata contain !4 + // instead of !3, traverse all of Option's operands and push them into + // MDs if Option seems to be a LoopID. + if (auto *MDN = dyn_cast<MDNode>(Option)) { + // TODO: Is there a proper way to detect LoopID? + if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) { + for (const MDOperand &NestedOption : drop_begin(MDN->operands())) { + MDs.push_back(NestedOption.get()); + Changed = true; + } + continue; + } + } + + // If Option does't seem to be a LoopID, push it as it is. MDs.push_back(Option.get()); Changed = true; } diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll index fa5c206547a07..41f508e0a7641 100644 --- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll +++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll @@ -11,10 +11,6 @@ ; a[i] *= x; ; } ; } -; -; FIXME: Currently unrolling is not applied. This is because the new Loop ID -; created after vectorization does not directly contain unroll metadata. -; Unexpected nests have been created. define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-LABEL: define void @f( ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) { @@ -25,14 +21,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]] -; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]] +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]] +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]] +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]] +; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]] +; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]] +; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]] +; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]] +; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]] +; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]] +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]] +; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]] +; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -41,14 +70,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]] ; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]] +; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]] +; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]] +; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]] +; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]] +; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]] +; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]] +; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]] +; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]] +; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]] +; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024 +; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -78,10 +142,8 @@ exit: !4 = !{!"llvm.loop.isvectorized"} !5 = !{!"llvm.loop.unroll.count", i32 8} ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]} -; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"} -; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8} -; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. >From b1d69e95eb7a75d9809cf77c11367f7bda66ff34 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga <kasuga.ryot...@fujitsu.com> Date: Tue, 25 Mar 2025 06:43:31 +0000 Subject: [PATCH 2/3] Revert "[LoopUtils] Fix metadata generated by makeFollowupLoopID" This reverts commit 889f40c5570af8a02e301c2bf3c6382f69210140. --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 29 ----- .../LoopVectorize/make-followup-loop-id.ll | 102 ++++-------------- 2 files changed, 20 insertions(+), 111 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 4a6105add953f..84c08556f8a25 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -317,35 +317,6 @@ std::optional<MDNode *> llvm::makeFollowupLoopID( HasAnyFollowup = true; for (const MDOperand &Option : drop_begin(FollowupNode->operands())) { - // The followup metadata typically forms as follows: - // - // !0 = distinct !{!0, !1, !2} - // !1 = !{!"llvm.loop.distribute.enable", i1 true} - // !2 = !{!"llvm.loop.distribute.followup_all", !3} - // !3 = distinct !{!3, !4} - // !4 = !{!"llvm.loop.vectorize.enable", i1 true} - // - // If we push Option (!3 in this case) in MDs, the new metadata looks - // something like: - // - // !5 = distinct !{!5, !3} - // - // This doesn't contain !4, so the vectorization pass doesn't recognize - // this loop as vectorization enabled. To make the new metadata contain !4 - // instead of !3, traverse all of Option's operands and push them into - // MDs if Option seems to be a LoopID. - if (auto *MDN = dyn_cast<MDNode>(Option)) { - // TODO: Is there a proper way to detect LoopID? - if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) { - for (const MDOperand &NestedOption : drop_begin(MDN->operands())) { - MDs.push_back(NestedOption.get()); - Changed = true; - } - continue; - } - } - - // If Option does't seem to be a LoopID, push it as it is. MDs.push_back(Option.get()); Changed = true; } diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll index 41f508e0a7641..fa5c206547a07 100644 --- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll +++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll @@ -11,6 +11,10 @@ ; a[i] *= x; ; } ; } +; +; FIXME: Currently unrolling is not applied. This is because the new Loop ID +; created after vectorization does not directly contain unroll metadata. +; Unexpected nests have been created. define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-LABEL: define void @f( ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) { @@ -21,47 +25,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]] -; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]] -; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]] -; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]] -; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]] -; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]] -; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]] -; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]] -; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]] -; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]] -; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]] -; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]] -; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]] -; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]] -; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]] -; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]] -; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32 +; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -70,49 +41,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]] ; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] -; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]] -; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]] -; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]] -; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]] -; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]] -; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]] -; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]] -; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]] -; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4 -; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]] -; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4 -; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]] -; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4 -; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]] -; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4 -; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]] -; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4 -; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]] -; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4 -; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 -; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024 -; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -142,8 +78,10 @@ exit: !4 = !{!"llvm.loop.isvectorized"} !5 = !{!"llvm.loop.unroll.count", i32 8} ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"} +; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8} +; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} ;. >From ce8febb0befe41694b9d83c14dcfb831a82489ff Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga <kasuga.ryot...@fujitsu.com> Date: Wed, 26 Mar 2025 07:29:18 +0000 Subject: [PATCH 3/3] [clang][CodeGen] Generate follow-up metadata for loops in correct format When pragma of loop transformations is specified, follow-up metadata for loops is generated after each transformation. On the LLVM side, follow-up metadata is expected to be a list of properties, such as the following: ``` !followup = !{!"llvm.loop.vectorize.followup_all", !mp, !isvectorized} !mp = !{!"llvm.loop.mustprogress"} !isvectorized = !{"llvm.loop.isvectorized"} ``` However, on the clang side, the generated metadata contains an MDNode that has those properties, as shown below: ``` !followup = !{!"llvm.loop.vectorize.followup_all", !loop_id} !loop_id = distinct !{!loop_id, !mp, !isvectorized} !mp = !{!"llvm.loop.mustprogress"} !isvectorized = !{"llvm.loop.isvectorized"} ``` According to the LangRef, the LLVM side is correct. (ref: https://llvm.org/docs/TransformMetadata.html#transformation-metadata-structure). Due to this inconsistency, follow-up metadata was not interpreted correctly, e.g., only one transformation is applied when multiple pragmas are used. This patch fixes clang side to emit followup metadata in correct format. --- clang/lib/CodeGen/CGLoopInfo.cpp | 133 ++++++++---------- clang/lib/CodeGen/CGLoopInfo.h | 43 +++--- .../test/CodeGenCXX/pragma-followup_inner.cpp | 9 +- .../test/CodeGenCXX/pragma-followup_outer.cpp | 12 +- clang/test/CodeGenCXX/pragma-loop.cpp | 25 +--- .../LoopVectorize/make-followup-loop-id.ll | 108 +++++++++++--- 6 files changed, 180 insertions(+), 150 deletions(-) diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index 448571221ef81..2b7d7881ab990 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -22,20 +22,20 @@ using namespace clang::CodeGen; using namespace llvm; MDNode * -LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) { +LoopInfo::createFollowupMetadata(const char *FollowupName, + ArrayRef<llvm::Metadata *> LoopProperties) { LLVMContext &Ctx = Header->getContext(); - SmallVector<Metadata *, 4> NewLoopProperties; - NewLoopProperties.push_back(nullptr); - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); - LoopID->replaceOperandWith(0, LoopID); - return LoopID; + SmallVector<Metadata *, 4> Args; + Args.push_back(MDString::get(Ctx, FollowupName)); + Args.append(LoopProperties.begin(), LoopProperties.end()); + return MDNode::get(Ctx, Args); } -MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, - ArrayRef<Metadata *> LoopProperties, - bool &HasUserTransforms) { +SmallVector<Metadata *, 4> +LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); std::optional<bool> Enabled; @@ -44,23 +44,19 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, else if (Attrs.PipelineInitiationInterval != 0) Enabled = true; + SmallVector<Metadata *, 4> Args; + Args.append(LoopProperties.begin(), LoopProperties.end()); + if (Enabled != true) { - SmallVector<Metadata *, 4> NewLoopProperties; if (Enabled == false) { - NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - NewLoopProperties.push_back( + Args.push_back( MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"), ConstantAsMetadata::get(ConstantInt::get( llvm::Type::getInt1Ty(Ctx), 1))})); - LoopProperties = NewLoopProperties; } - return createLoopPropertiesMetadata(LoopProperties); + return Args; } - SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); - Args.append(LoopProperties.begin(), LoopProperties.end()); - if (Attrs.PipelineInitiationInterval > 0) { Metadata *Vals[] = { MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), @@ -71,13 +67,11 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, // No follow-up: This is the last transformation. - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode * +SmallVector<Metadata *, 4> LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, ArrayRef<Metadata *> LoopProperties, bool &HasUserTransforms) { @@ -108,11 +102,10 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); bool FollowupHasTransforms = false; - MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); + SmallVector<Metadata *, 4> Followup = createPipeliningMetadata( + Attrs, FollowupLoopProperties, FollowupHasTransforms); SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); Args.append(LoopProperties.begin(), LoopProperties.end()); // Setting unroll.count @@ -130,16 +123,14 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, } if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup})); + Args.push_back( + createFollowupMetadata("llvm.loop.unroll.followup_all", Followup)); - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode * +SmallVector<Metadata *, 4> LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, ArrayRef<Metadata *> LoopProperties, bool &HasUserTransforms) { @@ -170,11 +161,10 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); bool FollowupHasTransforms = false; - MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); + SmallVector<Metadata *, 4> Followup = createPartialUnrollMetadata( + Attrs, FollowupLoopProperties, FollowupHasTransforms); SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); Args.append(LoopProperties.begin(), LoopProperties.end()); // Setting unroll_and_jam.count @@ -192,22 +182,18 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, } if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"), - Followup})); + Args.push_back(createFollowupMetadata( + "llvm.loop.unroll_and_jam.followup_outer", Followup)); - if (UnrollAndJamInnerFollowup) - Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"), - UnrollAndJamInnerFollowup})); + if (UnrollAndJamInnerFollowup.has_value()) + Args.push_back(createFollowupMetadata( + "llvm.loop.unroll_and_jam.followup_inner", *UnrollAndJamInnerFollowup)); - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode * +SmallVector<Metadata *, 4> LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, ArrayRef<Metadata *> LoopProperties, bool &HasUserTransforms) { @@ -244,11 +230,10 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); bool FollowupHasTransforms = false; - MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties, - FollowupHasTransforms); + SmallVector<Metadata *, 4> Followup = createUnrollAndJamMetadata( + Attrs, FollowupLoopProperties, FollowupHasTransforms); SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); Args.append(LoopProperties.begin(), LoopProperties.end()); // Setting vectorize.predicate when it has been specified and vectorization @@ -315,17 +300,14 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, } if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, - {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup})); + Args.push_back( + createFollowupMetadata("llvm.loop.vectorize.followup_all", Followup)); - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode * +SmallVector<Metadata *, 4> LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, ArrayRef<Metadata *> LoopProperties, bool &HasUserTransforms) { @@ -352,11 +334,10 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, } bool FollowupHasTransforms = false; - MDNode *Followup = + SmallVector<Metadata *, 4> Followup = createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms); SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); Args.append(LoopProperties.begin(), LoopProperties.end()); Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), @@ -366,19 +347,17 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); if (FollowupHasTransforms) - Args.push_back(MDNode::get( - Ctx, - {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup})); + Args.push_back( + createFollowupMetadata("llvm.loop.distribute.followup_all", Followup)); - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, - ArrayRef<Metadata *> LoopProperties, - bool &HasUserTransforms) { +SmallVector<Metadata *, 4> +LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { LLVMContext &Ctx = Header->getContext(); std::optional<bool> Enabled; @@ -400,20 +379,17 @@ MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, } SmallVector<Metadata *, 4> Args; - Args.push_back(nullptr); Args.append(LoopProperties.begin(), LoopProperties.end()); Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))); // No follow-up: there is no loop after full unrolling. // TODO: Warn if there are transformations after full unrolling. - MDNode *LoopID = MDNode::getDistinct(Ctx, Args); - LoopID->replaceOperandWith(0, LoopID); HasUserTransforms = true; - return LoopID; + return Args; } -MDNode *LoopInfo::createMetadata( +SmallVector<Metadata *, 4> LoopInfo::createMetadata( const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties, bool &HasUserTransforms) { @@ -579,8 +555,8 @@ void LoopInfo::finish() { MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); bool InnerFollowupHasTransform = false; - MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties, - InnerFollowupHasTransform); + SmallVector<Metadata *, 4> InnerFollowup = createMetadata( + AfterJam, BeforeLoopProperties, InnerFollowupHasTransform); if (InnerFollowupHasTransform) Parent->UnrollAndJamInnerFollowup = InnerFollowup; } @@ -589,7 +565,14 @@ void LoopInfo::finish() { } bool HasUserTransforms = false; - LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); + SmallVector<Metadata *, 4> Properties = + createMetadata(CurLoopAttr, {}, HasUserTransforms); + SmallVector<Metadata *, 4> Args; + Args.push_back(nullptr); + Args.append(Properties.begin(), Properties.end()); + LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + TempLoopID->replaceAllUsesWith(LoopID); } diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 0fe33b2891306..3c57124f4137c 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -132,17 +132,19 @@ class LoopInfo { /// If this loop has unroll-and-jam metadata, this can be set by the inner /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner /// metadata. - llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; + std::optional<llvm::SmallVector<llvm::Metadata *, 4>> + UnrollAndJamInnerFollowup; - /// Create a LoopID without any transformations. + /// Create a followup MDNode that has @p LoopProperties as its attributes. llvm::MDNode * - createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties); + createFollowupMetadata(const char *FollowupName, + llvm::ArrayRef<llvm::Metadata *> LoopProperties); - /// Create a LoopID for transformations. + /// Create a metadata list for transformations. /// /// The methods call each other in case multiple transformations are applied - /// to a loop. The transformation first to be applied will use LoopID of the - /// next transformation in its followup attribute. + /// to a loop. The transformation first to be applied will use metadata list + /// of the next transformation in its followup attribute. /// /// @param Attrs The loop's transformations. /// @param LoopProperties Non-transformation properties such as debug @@ -152,36 +154,37 @@ class LoopInfo { /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes /// at least one transformation. /// - /// @return A LoopID (metadata node) that can be used for the llvm.loop - /// annotation or followup-attribute. + /// @return A metadata list that can be used for the llvm.loop annotation or + /// followup-attribute. /// @{ - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createPipeliningMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createPartialUnrollMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createUnrollAndJamMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createLoopVectorizeMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createLoopDistributeMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); - llvm::MDNode * + llvm::SmallVector<llvm::Metadata *, 4> createFullUnrollMetadata(const LoopAttributes &Attrs, llvm::ArrayRef<llvm::Metadata *> LoopProperties, bool &HasUserTransforms); + /// @} - /// Create a LoopID for this loop, including transformation-unspecific + /// Create a metadata list for this loop, including transformation-unspecific /// metadata such as debug location. /// /// @param Attrs This loop's attributes and transformations. @@ -191,11 +194,11 @@ class LoopInfo { /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes /// at least one transformation. /// - /// @return A LoopID (metadata node) that can be used for the llvm.loop - /// annotation. - llvm::MDNode *createMetadata(const LoopAttributes &Attrs, - llvm::ArrayRef<llvm::Metadata *> LoopProperties, - bool &HasUserTransforms); + /// @return A metadata list that can be used for the llvm.loop annotation. + llvm::SmallVector<llvm::Metadata *, 4> + createMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); }; /// A stack of loop information corresponding to loop nesting levels. diff --git a/clang/test/CodeGenCXX/pragma-followup_inner.cpp b/clang/test/CodeGenCXX/pragma-followup_inner.cpp index bdcf8a9cbcc07..04f5656dcd638 100644 --- a/clang/test/CodeGenCXX/pragma-followup_inner.cpp +++ b/clang/test/CodeGenCXX/pragma-followup_inner.cpp @@ -23,20 +23,17 @@ extern "C" void followup_inner(int n, int *x) { // CHECK-DAG: ![[INNERLOOP_3]] = distinct !{![[INNERLOOP_3]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} // CHECK-DAG: ![[PARALLEL_ACCESSES_4]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]} // CHECK-DAG: ![[DISTRIBUTE_5]] = !{!"llvm.loop.distribute.enable", i1 true} -// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]} -// CHECK-DAG: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]} // CHECK-DAG: ![[VECTORIZE_8]] = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK-DAG: ![[OUTERLOOP_9]] = distinct !{![[OUTERLOOP_9]], [[MP:![0-9]+]], ![[UNROLLANDJAM_COUNT_10:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPINNER_11:[0-9]+]]} // CHECK-DAG: ![[UNROLLANDJAM_COUNT_10]] = !{!"llvm.loop.unroll_and_jam.count", i32 4} -// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_12:[0-9]+]]} -// CHECK-DAG: ![[LOOP_12]] = distinct !{![[LOOP_12:[0-9]+]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]} // CHECK-DAG: ![[ISVECTORIZED_13]] = !{!"llvm.loop.isvectorized"} // CHECK-DAG: ![[UNROLL_COUNT_13]] = !{!"llvm.loop.unroll.count", i32 4} -// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_15:[0-9]+]]} -// CHECK-DAG: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]} // CHECK-DAG: ![[UNROLL_DISABLE_16]] = !{!"llvm.loop.unroll.disable"} // CHECK-DAG: ![[PIPELINE_17]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10} diff --git a/clang/test/CodeGenCXX/pragma-followup_outer.cpp b/clang/test/CodeGenCXX/pragma-followup_outer.cpp index c3ca2a7b1c288..fdd5a07d00857 100644 --- a/clang/test/CodeGenCXX/pragma-followup_outer.cpp +++ b/clang/test/CodeGenCXX/pragma-followup_outer.cpp @@ -20,22 +20,18 @@ extern "C" void followup_outer(int n, int *x) { // CHECK-DAG: ![[LOOP_3:[0-9]+]] = distinct !{![[LOOP_3:[0-9]+]], [[MP:![0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]} // CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]} // CHECK-DAG: ![[DISTRIBUTE_5:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 true} -// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]} -// CHECK-DAG: ![[LOOP_7:[0-9]+]] = distinct !{![[LOOP_7:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]} +// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]} // CHECK-DAG: ![[VECTORIZE_8:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true} -// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_10:[0-9]+]]} -// CHECK-DAG: ![[LOOP_10:[0-9]+]] = distinct !{![[LOOP_10:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]} +// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]} // CHECK-DAG: ![[ISVECTORIZED_11:[0-9]+]] = !{!"llvm.loop.isvectorized"} // CHECK-DAG: ![[UNROLLANDJAM_12:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.enable"} -// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_14:[0-9]+]]} -// CHECK-DAG: ![[LOOP_14:[0-9]+]] = distinct !{![[LOOP_14:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]} +// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]} // CHECK-DAG: ![[UNROLLANDJAM_DISABLE_15:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.disable"} // CHECK-DAG: ![[UNROLL_COUNT_16:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 4} -// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_18:[0-9]+]]} -// CHECK-DAG: ![[LOOP_18:[0-9]+]] = distinct !{![[LOOP_18:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]} +// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]} // CHECK-DAG: ![[UNROLL_DISABLE_19:[0-9]+]] = !{!"llvm.loop.unroll.disable"} // CHECK-DAG: ![[INITIATIONINTERVAL_20:[0-9]+]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10} diff --git a/clang/test/CodeGenCXX/pragma-loop.cpp b/clang/test/CodeGenCXX/pragma-loop.cpp index 127df41522a57..76bdcc4a5a9c9 100644 --- a/clang/test/CodeGenCXX/pragma-loop.cpp +++ b/clang/test/CodeGenCXX/pragma-loop.cpp @@ -215,8 +215,7 @@ void for_test_scalable_1(int *List, int Length) { // CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], [[MP]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_3:.*]]} -// CHECK: ![[AFTER_VECTOR_3]] = distinct !{![[AFTER_VECTOR_3]], [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} +// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} // CHECK: ![[ISVECTORIZED]] = !{!"llvm.loop.isvectorized"} // CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8} @@ -227,36 +226,26 @@ void for_test_scalable_1(int *List, int Length) { // CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_1:.*]]} // CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1} -// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_6:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_6]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_6:.*]]} -// CHECK: ![[AFTER_VECTOR_6]] = distinct !{![[AFTER_VECTOR_6]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} +// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]} // CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]} // CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5} // CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]} -// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_9:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_9]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_9:.*]]} -// CHECK: ![[AFTER_VECTOR_9]] = distinct !{![[AFTER_VECTOR_9]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} +// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_3]]} -// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_10:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_10]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_10:.*]]} -// CHECK: ![[AFTER_VECTOR_10]] = distinct !{![[AFTER_VECTOR_10]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} +// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]} -// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_11:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_11]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_11:.*]]} -// CHECK: ![[AFTER_VECTOR_11]] = distinct !{![[AFTER_VECTOR_11]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]} +// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_3]]} // CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[WIDTH_6:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_10:.*]], ![[FOLLOWUP_VECTOR_12:.*]]} -// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_12:.*]]} -// CHECK: ![[AFTER_VECTOR_12]] = distinct !{![[AFTER_VECTOR_12]], ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]} +// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]} // CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24} // CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[WIDTH_8:.*]], ![[INTERLEAVE_16:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_13:.*]]} // CHECK: ![[INTERLEAVE_16]] = !{!"llvm.loop.interleave.count", i32 16} -// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_13:.*]]} -// CHECK: ![[AFTER_VECTOR_13]] = distinct !{![[AFTER_VECTOR_13]], ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]} +// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]} // CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32} // CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], [[MP]], ![[WIDTH_10:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]} diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll index fa5c206547a07..1d633dd6a4e04 100644 --- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll +++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll @@ -12,9 +12,6 @@ ; } ; } ; -; FIXME: Currently unrolling is not applied. This is because the new Loop ID -; created after vectorization does not directly contain unroll metadata. -; Unexpected nests have been created. define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-LABEL: define void @f( ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) { @@ -25,14 +22,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]] -; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]] +; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]] +; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]] +; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]] +; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]] +; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]] +; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]] +; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]] +; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]] +; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]] +; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]] +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]] +; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]] +; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -41,14 +71,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]] ; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]] +; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]] +; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]] +; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]] +; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]] +; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]] +; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]] +; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]] +; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]] +; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]] +; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024 +; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -73,15 +138,12 @@ exit: !0 = distinct !{!0, !1, !2} !1 = !{!"llvm.loop.vectorize.enable", i1 true} -!2 = !{!"llvm.loop.vectorize.followup_all", !3} -!3 = distinct !{!3, !4, !5} -!4 = !{!"llvm.loop.isvectorized"} -!5 = !{!"llvm.loop.unroll.count", i32 8} +!2 = !{!"llvm.loop.vectorize.followup_all", !3, !4} +!3 = !{!"llvm.loop.isvectorized"} +!4 = !{!"llvm.loop.unroll.count", i32 8} ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]} -; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"} -; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8} -; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits