https://github.com/banach-space created 
https://github.com/llvm/llvm-project/pull/180559

- **[CIR] Refactor tests for SVE svdup builtins**
- **Trim CIR output**
- **Simplify LLVM IR output**


From b4ee335de15bb15f824e082244198b50f3bb243a Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Mon, 9 Feb 2026 15:54:31 +0000
Subject: [PATCH 1/3] [CIR] Refactor tests for SVE svdup builtins

Refactor the SVE svdup builtin tests to focus on aspects that are unique to
their code generation: namely, that the expected LLVM SVE intrinsic (or
intrinsics) is emitted. Other codegen details (such as stack allocations
or temporary materialization) are intentionally not checked, as they are
not part of the builtin-specific codegen logic, but rather generic
Clang/CIR lowering behavior.

The generated CIR remains unchanged, but the CHECK lines are simplified
to only match the intrinsic calls, e.g.:

```mlir
  cir.call_llvm_intrinsic "aarch64.sve.<intrinsic-name>"
```

For the LLVM IR checks, the tests now run `opt -passes=sroa` to eliminate
irrelevant IR noise. This allows the checks to be reduced to the essential
intrinsic calls, for example:

```llvm
  define dso_local <vscale x 2 x double> @test_svdup_n_f64_z(
      <vscale x 16 x i1> %0, double %1) {
    %3 = call <vscale x 2 x i1>
         @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
    %4 = call <vscale x 2 x double>
         @llvm.aarch64.sve.dup.nxv2f64(
           <vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %3, double 
%1)
    ret <vscale x 2 x double> %4
  }
```
---
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
index 60a2992ab14ad..6bbf3e5d17545 100644
--- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -1,13 +1,15 @@
 // REQUIRES: aarch64-registered-target
-//
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
 
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// DEFINE: %{common_flags} = -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall
+
+// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-llvm 
-o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-llvm 
-o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
 
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1                        %{common_flags} -emit-llvm -o - %s | 
FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -emit-llvm -o - %s | 
FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
 #include <arm_sve.h>
 
 #if defined __ARM_FEATURE_SME
@@ -23,6 +25,10 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+//===------------------------------------------------------===//
+// 1. UNPREDICTED SVDUP
+//===------------------------------------------------------===//
+
 // ALL-LABEL: @test_svdup_n_s8
 svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
 {
@@ -210,6 +216,10 @@ svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
   return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
 }
 
+//===------------------------------------------------------===//
+// 2. PREDICATED ZERO-ING SVDUP
+//===------------------------------------------------------===//
+
 // ALL-LABEL: @test_svdup_n_s8_z
 svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) MODE_ATTR
 {

From ab1ce924402ae04122c0ced775466a126ba94e4e Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Mon, 9 Feb 2026 16:14:35 +0000
Subject: [PATCH 2/3] Trim CIR output

---
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 262 +++---------------
 1 file changed, 36 insertions(+), 226 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
index 6bbf3e5d17545..fd6ee33234cb8 100644
--- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -1,15 +1,16 @@
 // REQUIRES: aarch64-registered-target
 
 // DEFINE: %{common_flags} = -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall
+// DEFINE: %{optimize} = opt -O0 -S
 
 // RUN: %clang_cc1                        %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
 
-// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-llvm 
-o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-llvm 
-o - %s | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
 
-// RUN: %clang_cc1                        %{common_flags} -emit-llvm -o - %s | 
FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -emit-llvm -o - %s | 
FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1                        %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
 #include <arm_sve.h>
 
 #if defined __ARM_FEATURE_SME
@@ -32,11 +33,7 @@
 // ALL-LABEL: @test_svdup_n_s8
 svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s8i) -> !cir.vector<[16] x !s8i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : (!s8i) 
-> !cir.vector<[16] x !s8i>
 
 // LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
@@ -49,11 +46,7 @@ svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s16
 svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s16i) -> !cir.vector<[8] x !s16i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s16i) -> !cir.vector<[8] x !s16i>
 
 // LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
@@ -66,11 +59,7 @@ svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s32
 svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s32i) -> !cir.vector<[4] x !s32i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s32i) -> !cir.vector<[4] x !s32i>
 
 // LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
@@ -83,11 +72,7 @@ svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s64
 svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s64i) -> !cir.vector<[2] x !s64i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s64i) -> !cir.vector<[2] x !s64i>
 
 // LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
@@ -100,11 +85,7 @@ svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u8
 svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u8i) -> !cir.vector<[16] x !u8i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : (!u8i) 
-> !cir.vector<[16] x !u8i>
 
 // LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
@@ -117,16 +98,8 @@ svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u16
 svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u16i) -> !cir.vector<[8] x !u16i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u16i) -> !cir.vector<[8] x !u16i>
 
-// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
-// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
 // LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
   return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
 }
@@ -134,11 +107,7 @@ svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u32
 svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u32i) -> !cir.vector<[4] x !u32i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u32i) -> !cir.vector<[4] x !u32i>
 
 // LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
@@ -151,11 +120,7 @@ svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u64
 svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u64i) -> !cir.vector<[2] x !u64i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u64i) -> !cir.vector<[2] x !u64i>
 
 // LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
@@ -168,11 +133,7 @@ svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f16
 svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
 
 // LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
@@ -185,11 +146,7 @@ svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f32
 svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x 
!cir.float>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
 
 // LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
@@ -202,11 +159,7 @@ svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f64
 svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x 
!cir.double>
-// CIR:           %[[ALLOCA:.*]] = cir.alloca
-// CIR:           cir.store %[[OP]], %[[ALLOCA]]
-// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
-// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
 
 // LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
@@ -223,22 +176,9 @@ svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s8_z
 svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !s8i
-// CIR-SAME:        -> !cir.vector<[16] x !s8i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !s8i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !s8i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]]
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x 
!s8i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], {{%.*}}, {{%.*}} :
 // CIR-SAME:        -> !cir.vector<[16] x !s8i>
-// CIR:           cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -263,24 +203,11 @@ svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s16_z(
 svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !s16i
-// CIR-SAME:        -> !cir.vector<[8] x !s16i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !s16i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[8] x !s16i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(2) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[8] x 
!s16i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{%.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !s16i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -306,24 +233,11 @@ svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s32_z(
 svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !s32i
-// CIR-SAME:        -> !cir.vector<[4] x !s32i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !s32i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[4] x !s32i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]]
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(4) %[[ALLOCA_OP]]
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[4] x 
!s32i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{%.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !s32i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -349,24 +263,11 @@ svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_s64_z(
 svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !s64i
-// CIR-SAME:        -> !cir.vector<[2] x !s64i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !s64i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[2] x !s64i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(8) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[2] x 
!s64i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]] 
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !s64i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -392,22 +293,9 @@ svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u8_z(
 svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !u8i
-// CIR-SAME:          -> !cir.vector<[16] x !u8i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !u8i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[16] x !u8i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(1) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[16] x 
!u8i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.dup" %[[CONST_0]], %[[LOAD_PG]], %[[LOAD_OP]] 
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.dup" %[[CONST_0]], {{.*}}, {{.*}} :
 // CIR-SAME:        -> !cir.vector<[16] x !u8i>
-// CIR:           cir.store %[[CONVERT_PG]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -432,24 +320,11 @@ svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u16_z(
 svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !u16i
-// CIR-SAME:        -> !cir.vector<[8] x !u16i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !u16i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[8] x !u16i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(2) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[8] x 
!u16i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !u16i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -475,24 +350,11 @@ svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u32_z(
 svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !u32i
-// CIR-SAME:        -> !cir.vector<[4] x !u32i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !u32i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[4] x !u32i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(4) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[4] x 
!u32i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !u32i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -518,24 +380,11 @@ svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_u64_z(
 svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !u64i
-// CIR-SAME:        -> !cir.vector<[2] x !u64i>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !u64i
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[2] x !u64i>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(8) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[2] x 
!u64i>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]] 
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !u64i>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -561,24 +410,11 @@ svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f16_z(
 svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !cir.f16
-// CIR-SAME:        -> !cir.vector<[8] x !cir.f16>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !cir.f16
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[8] x !cir.f16>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(2) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[8] x 
!cir.f16>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[8] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]] 
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[8] x !cir.f16>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], half 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -604,24 +440,11 @@ svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f32_z(
 svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !cir.float
-// CIR-SAME:          -> !cir.vector<[4] x !cir.float>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !cir.float
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[4] x !cir.float>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(4) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[4] x 
!cir.float>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !cir.float>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], float 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
@@ -647,24 +470,11 @@ svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op) 
MODE_ATTR
 // ALL-LABEL: @test_svdup_n_f64_z(
 svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op) MODE_ATTR
 {
-// CIR-SAME:      %[[PG:.*]]: !cir.vector<[16] x !cir.int<u, 1>>
-// CIR-SAME:      %[[OP:.*]]: !cir.double
-// CIR-SAME:        -> !cir.vector<[2] x !cir.double>
-// CIR:           %[[ALLOCA_PG:.*]] = cir.alloca !cir.vector<[16] x 
!cir.int<u, 1>>
-// CIR:           %[[ALLOCA_OP:.*]] = cir.alloca !cir.double
-// CIR:           %[[ALLOCA_RES:.*]] = cir.alloca !cir.vector<[2] x 
!cir.double>
-// CIR:           cir.store %[[PG]], %[[ALLOCA_PG]]
-// CIR:           cir.store %[[OP]], %[[ALLOCA_OP]]
-// CIR:           %[[LOAD_PG:.*]] = cir.load align(2) %[[ALLOCA_PG]] 
-// CIR:           %[[LOAD_OP:.*]] = cir.load align(8) %[[ALLOCA_OP]] 
 // CIR:           %[[CONST_0:.*]] = cir.const #cir.zero : !cir.vector<[2] x 
!cir.double>
-// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %[[LOAD_PG]]
+// CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !cir.int<u, 1>>
-// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %[[LOAD_OP]]
+// CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %{{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !cir.double>
-// CIR:           cir.store %[[CALL_DUP]], %[[ALLOCA_RES]]
-// CIR:           %[[RES:.*]] = cir.load %[[ALLOCA_RES]]
-// CIR:           cir.return %[[RES]]
 
 // LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], double 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
 // LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2

From a1b859b9db08a5a0bf01ab51183046ec94314a30 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Mon, 9 Feb 2026 16:34:12 +0000
Subject: [PATCH 3/3] Simplify LLVM IR output

---
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 331 +++++-------------
 1 file changed, 79 insertions(+), 252 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
index fd6ee33234cb8..b3121439d63d7 100644
--- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -1,16 +1,16 @@
 // REQUIRES: aarch64-registered-target
 
 // DEFINE: %{common_flags} = -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall
-// DEFINE: %{optimize} = opt -O0 -S
+// DEFINE: %{optimize} = opt -passes=sroa -S
 
 // RUN: %clang_cc1                        %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-cir 
-o - %s | FileCheck %s --check-prefixes=ALL,CIR
 
-// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_VIA_CIR
+// RUN: %clang_cc1                        %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -fclangir -emit-llvm 
-o - %s | %{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
 
-// RUN: %clang_cc1                        %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR,LLVM_DIRECT
+// RUN: %clang_cc1                        %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS %{common_flags} -emit-llvm -o - %s | 
%{optimize} | FileCheck %s --check-prefixes=ALL,LLVM_OGCG_CIR
 #include <arm_sve.h>
 
 #if defined __ARM_FEATURE_SME
@@ -35,11 +35,9 @@ svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : (!s8i) 
-> !cir.vector<[16] x !s8i>
 
-// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
-// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 16 x i8> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s8,)(op);
 }
 
@@ -48,11 +46,9 @@ svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s16i) -> !cir.vector<[8] x !s16i>
 
-// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
-// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i16{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 8 x i16> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s16,)(op);
 }
 
@@ -61,11 +57,9 @@ svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s32i) -> !cir.vector<[4] x !s32i>
 
-// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
-// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 4 x i32> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s32,)(op);
 }
 
@@ -74,11 +68,9 @@ svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!s64i) -> !cir.vector<[2] x !s64i>
 
-// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
-// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 2 x i64> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s64,)(op);
 }
 
@@ -87,11 +79,9 @@ svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : (!u8i) 
-> !cir.vector<[16] x !u8i>
 
-// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
-// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 16 x i8> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u8,)(op);
 }
 
@@ -100,7 +90,8 @@ svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u16i) -> !cir.vector<[8] x !u16i>
 
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 8 x i16> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
 }
 
@@ -109,11 +100,9 @@ svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u32i) -> !cir.vector<[4] x !u32i>
 
-// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
-// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 4 x i32> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u32,)(op);
 }
 
@@ -122,11 +111,9 @@ svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!u64i) -> !cir.vector<[2] x !u64i>
 
-// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
-// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 2 x i64> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u64,)(op);
 }
 
@@ -135,11 +122,9 @@ svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
 
-// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
-// LLVM_OGCG_CIR:    store half [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: half{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.x.nxv8f16(half [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 8 x half> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f16,)(op);
 }
 
@@ -148,11 +133,9 @@ svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
 
-// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
-// LLVM_OGCG_CIR:    store float [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: float{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.x.nxv4f32(float [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 4 x float> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f32,)(op);
 }
 
@@ -161,11 +144,9 @@ svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
 {
 // CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %{{.*}} : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
 
-// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
-// LLVM_OGCG_CIR:    store double [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]])
+// LLVM_OGCG_CIR-SAME: double{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.x.nxv2f64(double [[OP]])
+// LLVM_OGCG_CIR:    ret <vscale x 2 x double> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
 }
 
@@ -180,23 +161,9 @@ svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], {{%.*}}, {{%.*}} :
 // CIR-SAME:        -> !cir.vector<[16] x !s8i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 16 x 
i8>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 
x i1> [[TMP0]], i8 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP2]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP2]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 
x i1> [[PG]], i8 [[OP]])
+// LLVM_OGCG_CIR:     ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s8_z,)(pg, op);
 }
 
@@ -209,24 +176,10 @@ svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !s16i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 8 x 
i16>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i16, ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x 
i1> [[TMP2]], i16 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x 
i1> [[PG_CONVERTED]], i16 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s16_z,)(pg, op);
 }
 
@@ -239,24 +192,10 @@ svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !s32i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 4 x 
i32>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i32, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x 
i1> [[TMP2]], i32 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x 
i1> [[PG_CONVERTED]], i32 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s32_z,)(pg, op);
 }
 
@@ -269,24 +208,10 @@ svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !s64i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 2 x 
i64>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i64, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x 
i1> [[TMP2]], i64 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x 
i1> [[PG_CONVERTED]], i64 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_s64_z,)(pg, op);
 }
 
@@ -297,23 +222,9 @@ svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) 
MODE_ATTR
 // CIR:           %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.dup" %[[CONST_0]], {{.*}}, {{.*}} :
 // CIR-SAME:        -> !cir.vector<[16] x !u8i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 16 x 
i8>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i8, ptr [[OP_ADDR]], align 1
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 
x i1> [[TMP0]], i8 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP2]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP2]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 
x i1> [[PG]], i8 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u8_z,)(pg, op);
 }
 
@@ -326,24 +237,10 @@ svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:          -> !cir.vector<[8] x !u16i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 8 x 
i16>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i16, ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x 
i1> [[TMP2]], i16 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i16{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x 
i1> [[PG_CONVERTED]], i16 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u16_z,)(pg, op);
 }
 
@@ -356,24 +253,10 @@ svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !u32i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 4 x 
i32>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i32, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x 
i1> [[TMP2]], i32 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x 
i1> [[PG_CONVERTED]], i32 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u32_z,)(pg, op);
 }
 
@@ -386,24 +269,10 @@ svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !u64i>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 2 x 
i64>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load i64, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x 
i1> [[TMP2]], i64 [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x 
i1> [[PG_CONVERTED]], i64 [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_u64_z,)(pg, op);
 }
 
@@ -416,24 +285,10 @@ svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[8] x !cir.f16>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], half 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 8 x 
half>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store half [[OP]], ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load half, ptr [[OP_ADDR]], align 2
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 
x i1> [[TMP2]], half [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], half{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 
x i1> [[PG_CONVERTED]], half [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f16_z,)(pg, op);
 }
 
@@ -446,24 +301,10 @@ svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], {{.*}} :
 // CIR-SAME:        -> !cir.vector<[4] x !cir.float>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], float 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 4 x 
float>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store float [[OP]], ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load float, ptr [[OP_ADDR]], align 4
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 
x i1> [[TMP2]], float [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], float{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 
x i1> [[PG_CONVERTED]], float [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f32_z,)(pg, op);
 }
 
@@ -476,23 +317,9 @@ svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op) 
MODE_ATTR
 // CIR:           %[[CALL_DUP:.*]] = cir.call_llvm_intrinsic "aarch64.sve.dup" 
%[[CONST_0]], %[[CONVERT_PG]], %{{.*}} :
 // CIR-SAME:        -> !cir.vector<[2] x !cir.double>
 
-// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], double 
{{(noundef)?[[:space:]]?}}[[OP:%.*]])
-// LLVM_OGCG_CIR:    [[PG_ADDR:%.*]] = alloca <vscale x 16 x 
i1>,{{([[:space:]]?i64 1,)?}} align 2
-// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
-//
-// LLVM_VIA_CIR:    [[RES_ADDR:%.*]] = alloca <vscale x 2 x 
double>,{{([[:space:]]?i64 1,)?}} align 16
-//
-// LLVM_OGCG_CIR:    store <vscale x 16 x i1> [[PG]], ptr [[PG_ADDR]], align 2
-// LLVM_OGCG_CIR:    store double [[OP]], ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP0:%.*]] = load <vscale x 16 x i1>, ptr [[PG_ADDR]], 
align 2
-// LLVM_OGCG_CIR:    [[TMP1:%.*]] = load double, ptr [[OP_ADDR]], align 8
-// LLVM_OGCG_CIR:    [[TMP2:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
-// LLVM_OGCG_CIR:    [[TMP3:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 
2 x i1> [[TMP2]], double [[TMP1]])
-//
-// LLVM_DIRECT:     ret {{.*}} [[TMP3]]
-//
-// LLVM_VIA_CIR:    store {{.*}} [[TMP3]], ptr [[RES_ADDR]]
-// LLVM_VIA_CIR:    [[RES:%.*]] = load {{.*}} [[RES_ADDR]]
-// LLVM_VIA_CIR:    ret {{.*}} [[RES]]
+// LLVM_OGCG_CIR-SAME: <vscale x 16 x i1> [[PG:%.*]], double{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:    [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 
2 x i1> [[PG_CONVERTED]], double [[OP]])
+// LLVM_OGCG_CIR:    ret {{.*}} [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f64_z,)(pg, op);
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to