https://github.com/PeddleSpam updated https://github.com/llvm/llvm-project/pull/128938
>From f1c09277af268256fce71df9a858959b69385ef1 Mon Sep 17 00:00:00 2001 From: Leon Clark <leocl...@amd.com> Date: Wed, 26 Feb 2025 15:59:02 +0000 Subject: [PATCH 1/2] [AggressiveInstCombine] Shrink loads used in shufflevector rebroadcasts. Attempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions. --- .../load-shufflevector.ll | 345 ++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll new file mode 100644 index 0000000000000..3f6c8334e61cf --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll @@ -0,0 +1,345 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=aggressive-instcombine -S < %s | FileCheck %s + +define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: ret <8 x half> [[TMP1]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> + ret <8 x half> %val1 +} + +define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: ret <8 x half> [[TMP1]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> + ret <8 x half> %val1 +} + +define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> +; CHECK-NEXT: ret <4 x half> [[TMP1]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> + ret <4 x half> %val1 +} + +define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: ret <8 x half> [[TMP1]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> + ret <8 x half> %val1 +} + +define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <8 x half> [[VAL3]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + br label %finally + +else: + %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + br label %finally + +finally: + %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ] + ret <8 x half> %val3 +} + +define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <4 x half> [[VAL3]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + br label %finally + +else: + %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + br label %finally + +finally: + %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ] + ret <4 x half> %val3 +} + +define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <8 x half> [[VAL3]] +; +entry: + %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + br label %finally + +else: + %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> + br label %finally + +finally: + %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ] + ret <8 x half> %val3 +} + +define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> + ret <8 x i32> %val1 +} + +define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> + ret <8 x i32> %val1 +} + +define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> + ret <4 x i32> %val1 +} + +define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> + ret <8 x i32> %val1 +} + +define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <8 x i32> [[VAL3]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + br label %finally + +else: + %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + br label %finally + +finally: + %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] + ret <8 x i32> %val3 +} + +define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <8 x i32> [[VAL3]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + br label %finally + +else: + %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> + br label %finally + +finally: + %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] + ret <8 x i32> %val3 +} + +define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <4 x i32> [[VAL3]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + br label %finally + +else: + %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + br label %finally + +finally: + %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ] + ret <4 x i32> %val3 +} + +define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr { +; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2( +; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: br label %[[FINALLY:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: br label %[[FINALLY]] +; CHECK: [[FINALLY]]: +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: ret <8 x i32> [[VAL3]] +; +entry: + %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32 + br i1 %cond, label %then, label %else + +then: + %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> + br label %finally + +else: + %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> + br label %finally + +finally: + %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ] + ret <8 x i32> %val3 +} >From b8ec65331def0fce1f70d203f3473accbdd77865 Mon Sep 17 00:00:00 2001 From: Leon Clark <leocl...@amd.com> Date: Wed, 26 Feb 2025 21:18:30 +0000 Subject: [PATCH 2/2] Add implementation and update tests. --- clang/test/CodeGenOpenCL/preserve_vec3.cl | 20 ++--- .../AggressiveInstCombine.cpp | 90 +++++++++++++++++++ .../load-shufflevector.ll | 88 +++++++++--------- 3 files changed, 144 insertions(+), 54 deletions(-) diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index 49ebae6fc7013..0538eac4029bb 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local spir_kernel void @foo( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> +// CHECK-NEXT: [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: ret void // @@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> // CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> +// CHECK-NEXT: [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[ASTYPE:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> // CHECK-NEXT: store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -47,8 +47,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2( // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> +// CHECK-NEXT: [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> // CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // @@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) { // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3( // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]] +// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> // CHECK-NEXT: store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index fe7b3b1676e08..cbdf99316e9e3 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -915,6 +915,95 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { return true; } +// If `I` is a load instruction, used only by shufflevector instructions with +// poison values, attempt to shrink the load to only the lanes being used. +static bool shrinkLoadsForBroadcast(Instruction &I) { + auto *OldLoad = dyn_cast<LoadInst>(&I); + if (!OldLoad) + return false; + + auto *VecTy = dyn_cast<FixedVectorType>(I.getType()); + if (!VecTy) + return false; + + auto IsPoisonOrUndef = [](Value *V) -> bool { + if (auto *C = dyn_cast<Constant>(V)) { + return isa<PoisonValue>(C) || isa<UndefValue>(C); + } + return false; + }; + + using IndexRange = std::pair<unsigned, unsigned>; + auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> { + auto OutputRange = IndexRange(VecTy->getNumElements(), 0u); + for (auto &Use: I.uses()) { + // All uses must be ShuffleVector instructions. + auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser()); + if (!Shuffle) + return {}; + + // Get index range for value. + auto *Op0 = Shuffle->getOperand(0u); + auto *Op1 = Shuffle->getOperand(1u); + if (!IsPoisonOrUndef(Op1)) + return {}; + + // Find the min and max indices used by the ShuffleVector instruction. + auto Mask = Shuffle->getShuffleMask(); + auto *Op0Ty = cast<FixedVectorType>(Op0->getType()); + auto NumElems = Op0Ty->getNumElements(); + + for (unsigned Index: Mask) { + if (Index < NumElems) { + OutputRange.first = std::min(Index, OutputRange.first); + OutputRange.second = std::max(Index, OutputRange.second); + } + } + } + return OutputRange; + }; + + if (auto Indices = GetIndexRangeInShuffles()) { + auto OldSize = VecTy->getNumElements(); + auto NewSize = Indices->second + 1u; + + if (NewSize < OldSize) { + auto Builder = IRBuilder(&I); + Builder.SetCurrentDebugLocation(I.getDebugLoc()); + + // Create new load of smaller vector. + auto *ElemTy = VecTy->getElementType(); + auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize); + auto *NewLoad = cast<LoadInst>( + Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand())); + NewLoad->copyMetadata(I); + + // Replace all users. + auto OldShuffles = SmallVector<ShuffleVectorInst*, 4u>{}; + for (auto &Use: I.uses()) { + auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser()); + + Builder.SetInsertPoint(Shuffle); + Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc()); + auto *NewShuffle = Builder.CreateShuffleVector( + NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask() + ); + + Shuffle->replaceAllUsesWith(NewShuffle); + OldShuffles.push_back(Shuffle); + } + + // Erase old users. + for (auto *Shuffle: OldShuffles) + Shuffle->eraseFromParent(); + + I.eraseFromParent(); + return true; + } + } + return false; +} + namespace { class StrNCmpInliner { public: @@ -1251,6 +1340,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT); MadeChange |= foldPatternedLoads(I, DL); + MadeChange |= shrinkLoadsForBroadcast(I); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll index 3f6c8334e61cf..57006f2c65380 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll @@ -5,8 +5,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %ar ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: ret <8 x half> [[TMP1]] ; entry: @@ -19,8 +19,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %ar ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: ret <8 x half> [[TMP1]] ; entry: @@ -33,8 +33,8 @@ define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %ar ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> ; CHECK-NEXT: ret <4 x half> [[TMP1]] ; entry: @@ -47,8 +47,8 @@ define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %ar ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: ret <8 x half> [[TMP1]] ; entry: @@ -61,16 +61,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonl ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <8 x half> [[VAL3]] ; entry: @@ -94,16 +94,16 @@ define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonl ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <4 x half> [[VAL3]] ; entry: @@ -127,16 +127,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonl ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <8 x half> [[VAL3]] ; entry: @@ -160,8 +160,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; entry: @@ -174,8 +174,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; entry: @@ -188,8 +188,8 @@ define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; entry: @@ -202,8 +202,8 @@ define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; entry: @@ -216,16 +216,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <8 x i32> [[VAL3]] ; entry: @@ -249,16 +249,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <8 x i32> [[VAL3]] ; entry: @@ -282,16 +282,16 @@ define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <4 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <4 x i32> [[VAL3]] ; entry: @@ -315,16 +315,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2( ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: -; CHECK-NEXT: [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: br label %[[FINALLY:.*]] ; CHECK: [[ELSE]]: -; CHECK-NEXT: [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: br label %[[FINALLY]] ; CHECK: [[FINALLY]]: -; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ] ; CHECK-NEXT: ret <8 x i32> [[VAL3]] ; entry: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits