https://github.com/PeddleSpam updated 
https://github.com/llvm/llvm-project/pull/128938

>From f1c09277af268256fce71df9a858959b69385ef1 Mon Sep 17 00:00:00 2001
From: Leon Clark <leocl...@amd.com>
Date: Wed, 26 Feb 2025 15:59:02 +0000
Subject: [PATCH 1/2] [AggressiveInstCombine] Shrink loads used in
 shufflevector rebroadcasts.

Attempt to shrink the size of vector loads where only some of the incoming 
lanes are used for rebroadcasts in shufflevector instructions.
---
 .../load-shufflevector.ll                     | 345 ++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 
llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll

diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll 
b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
new file mode 100644
index 0000000000000..3f6c8334e61cf
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -passes=aggressive-instcombine -S < %s | FileCheck %s
+
+define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %val1
+}
+
+define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, 
i32 1, i32 2, i32 2>
+  ret <4 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x half> %val3
+}
+
+define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, 
i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 2, 
i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x half> %val3
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 2, 
i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x half> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i32> %val1
+}
+
+define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, 
i32 1, i32 2, i32 2>
+  ret <4 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly 
%arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, 
i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, 
i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}
+
+define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, 
i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 2, 
i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture 
readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, 
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, 
i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}

>From b8ec65331def0fce1f70d203f3473accbdd77865 Mon Sep 17 00:00:00 2001
From: Leon Clark <leocl...@amd.com>
Date: Wed, 26 Feb 2025 21:18:30 +0000
Subject: [PATCH 2/2] Add implementation and update tests.

---
 clang/test/CodeGenOpenCL/preserve_vec3.cl     | 20 ++---
 .../AggressiveInstCombine.cpp                 | 90 +++++++++++++++++++
 .../load-shufflevector.ll                     | 88 +++++++++---------
 3 files changed, 144 insertions(+), 54 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl 
b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index 49ebae6fc7013..0538eac4029bb 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4)));
 // CHECK-LABEL: define dso_local spir_kernel void @foo(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) 
[[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual 
[[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type 
[[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], 
align 16
-// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> 
[[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], 
align 16
+// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> 
[[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], 
align 16, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 
captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space 
[[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] 
!kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], 
align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], 
align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], 
align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 
*b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) 
[[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] 
!kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] 
!kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] 
!kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], 
align 16
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], 
align 16
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 
x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 
16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -47,8 +47,8 @@ void kernel float3_to_float4(global float3 *a, global float4 
*b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) 
[[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) 
initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] 
!kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] 
!kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] 
!kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], 
align 16
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x 
float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], 
align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x 
float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 
16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global 
double2 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) 
initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 
captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space 
[[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] 
!kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 
8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x 
i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 
8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x 
i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], 
align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
diff --git 
a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp 
b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index fe7b3b1676e08..cbdf99316e9e3 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -915,6 +915,95 @@ static bool foldPatternedLoads(Instruction &I, const 
DataLayout &DL) {
   return true;
 }
 
+// If `I` is a load instruction, used only by shufflevector instructions with 
+// poison values, attempt to shrink the load to only the lanes being used.
+static bool shrinkLoadsForBroadcast(Instruction &I) {
+  auto *OldLoad = dyn_cast<LoadInst>(&I);
+  if (!OldLoad)
+    return false;
+
+  auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+  if (!VecTy)
+    return false;
+
+  auto IsPoisonOrUndef = [](Value *V) -> bool {
+    if (auto *C = dyn_cast<Constant>(V)) {
+      return isa<PoisonValue>(C) || isa<UndefValue>(C);
+    }
+    return false;
+  };
+
+  using IndexRange = std::pair<unsigned, unsigned>;
+  auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
+    auto OutputRange = IndexRange(VecTy->getNumElements(), 0u);
+    for (auto &Use: I.uses()) {
+      // All uses must be ShuffleVector instructions.
+      auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser());
+      if (!Shuffle)
+        return {};
+
+      // Get index range for value.
+      auto *Op0 = Shuffle->getOperand(0u);
+      auto *Op1 = Shuffle->getOperand(1u);
+      if (!IsPoisonOrUndef(Op1))
+        return {};
+
+      // Find the min and max indices used by the ShuffleVector instruction.
+      auto Mask = Shuffle->getShuffleMask();
+      auto *Op0Ty = cast<FixedVectorType>(Op0->getType());
+      auto NumElems = Op0Ty->getNumElements();
+
+      for (unsigned Index: Mask) {
+        if (Index < NumElems) {
+          OutputRange.first = std::min(Index, OutputRange.first);
+          OutputRange.second = std::max(Index, OutputRange.second);
+        }
+      }
+    }
+    return OutputRange;
+  };
+
+  if (auto Indices = GetIndexRangeInShuffles()) {
+    auto OldSize = VecTy->getNumElements();
+    auto NewSize = Indices->second + 1u;
+
+    if (NewSize < OldSize) {
+      auto Builder = IRBuilder(&I);
+      Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+      // Create new load of smaller vector.
+      auto *ElemTy = VecTy->getElementType();
+      auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
+      auto *NewLoad = cast<LoadInst>(
+        Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
+      NewLoad->copyMetadata(I);
+
+      // Replace all users.
+      auto OldShuffles = SmallVector<ShuffleVectorInst*, 4u>{};
+      for (auto &Use: I.uses()) {
+        auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
+        
+        Builder.SetInsertPoint(Shuffle);
+        Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
+        auto *NewShuffle = Builder.CreateShuffleVector(
+          NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask()
+        );
+
+        Shuffle->replaceAllUsesWith(NewShuffle);
+        OldShuffles.push_back(Shuffle);
+      }
+
+      // Erase old users.
+      for (auto *Shuffle: OldShuffles)
+        Shuffle->eraseFromParent();
+
+      I.eraseFromParent();
+      return true;
+    }
+  }
+  return false;
+}
+
 namespace {
 class StrNCmpInliner {
 public:
@@ -1251,6 +1340,7 @@ static bool foldUnusualPatterns(Function &F, 
DominatorTree &DT,
       MadeChange |= tryToRecognizeTableBasedCttz(I);
       MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
       MadeChange |= foldPatternedLoads(I, DL);
+      MadeChange |= shrinkLoadsForBroadcast(I);
       // NOTE: This function introduces erasing of the instruction `I`, so it
       // needs to be called at the end of this sequence, otherwise we may make
       // bugs.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll 
b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
index 3f6c8334e61cf..57006f2c65380 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
@@ -5,8 +5,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) 
nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], 
align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -19,8 +19,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) 
nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], 
align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -33,8 +33,8 @@ define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) 
nocapture readonly %ar
 ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], 
align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x half> [[TMP1]]
 ;
 entry:
@@ -47,8 +47,8 @@ define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) 
nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], 
align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -61,16 +61,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr 
addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], 
align 4
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> 
poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x half> [[VAL3]]
 ;
 entry:
@@ -94,16 +94,16 @@ define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr 
addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], 
align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <4 x half> [[VAL3]]
 ;
 entry:
@@ -127,16 +127,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr 
addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], 
align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x half> [[VAL3]]
 ;
 entry:
@@ -160,8 +160,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) 
nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], 
align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -174,8 +174,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) 
nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -188,8 +188,8 @@ define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) 
nocapture readonly %arg
 ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
 entry:
@@ -202,8 +202,8 @@ define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) 
nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) 
local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -216,16 +216,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr 
addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], 
align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> 
poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:
@@ -249,16 +249,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr 
addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:
@@ -282,16 +282,16 @@ define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr 
addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
 ;
 entry:
@@ -315,16 +315,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr 
addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 
[[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], 
align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], 
align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> 
poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ 
[[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ 
[[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to