https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/77656
>From 67be98b05d771dabe11af54b69532641fa548fb1 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Wed, 10 Jan 2024 17:58:30 +0000 Subject: [PATCH 1/2] [AArch64][SME] Fix multi vector cvt builtins This fixes cvt multi vector builtins that erroneously had inverted return vectors and vector parameters. This caused the incorrect instructions to be emitted. --- clang/include/clang/Basic/arm_sve.td | 18 ++--- .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 32 ++++---- llvm/include/llvm/IR/IntrinsicsAArch64.td | 8 +- .../CodeGen/AArch64/sme2-intrinsics-cvt.ll | 80 +++++++++---------- 4 files changed, 69 insertions(+), 69 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 7f80fb0386cc77..301ef67b3d40bc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2238,15 +2238,15 @@ let TargetGuard = "sme2" in { def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>; - def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>; - def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>; - def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>; - def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>; - - def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>; - def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>; - def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>; - def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>; + def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>; + def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>; + def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>; + def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>; + + def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>; + def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>; + def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>; + def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>; } // diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index a3ee7d2092f79f..f596b3167f2b82 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -59,7 +59,7 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -70,7 +70,7 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -85,7 +85,7 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -96,7 +96,7 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -111,7 +111,7 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -122,7 +122,7 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -137,7 +137,7 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -148,7 +148,7 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -166,7 +166,7 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -183,7 +183,7 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -204,7 +204,7 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -221,7 +221,7 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -242,7 +242,7 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -259,7 +259,7 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -280,7 +280,7 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -297,7 +297,7 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9088168b4c67b1..7dd97b4b42736a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3095,23 +3095,23 @@ let TargetPrefix = "aarch64" in { [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class SME2_CVT_FtoI_VG2_Intrinsic + class SME2_CVT_ItoF_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; - class SME2_CVT_ItoF_VG2_Intrinsic + class SME2_CVT_FtoI_VG2_Intrinsic : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; - class SME2_CVT_FtoI_VG4_Intrinsic + class SME2_CVT_ItoF_VG4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; - class SME2_CVT_ItoF_VG4_Intrinsic + class SME2_CVT_FtoI_VG4_Intrinsic : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll index 8e2824122383fd..4222e32ee35d8b 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll @@ -34,19 +34,19 @@ define <vscale x 8 x bfloat> @multi_vector_cvt_x2_bf16(<vscale x 4 x float> %unu ; ; FCVTZS ; -define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) { -; CHECK-LABEL: multi_vector_cvt_x2_f32_s32: +define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) { +; CHECK-LABEL: multi_vector_cvt_x2_s32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: fcvtzs { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1) - ret {<vscale x 4 x float>, <vscale x 4 x float>} %res + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res } -define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x4_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) { -; CHECK-LABEL: multi_vector_cvt_x4_f32_s32: +define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x4_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { +; CHECK-LABEL: multi_vector_cvt_x4_s32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov z6.d, z3.d @@ -54,26 +54,26 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: fcvtzs { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x i32>%zn3) - ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %res + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res } ; ; FCVTZU ; -define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) { -; CHECK-LABEL: multi_vector_cvt_x2_f32_u32: +define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_u32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) { +; CHECK-LABEL: multi_vector_cvt_x2_u32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: fcvtzu { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1) - ret {<vscale x 4 x float>, <vscale x 4 x float>} %res + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res } -define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x4_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) { -; CHECK-LABEL: multi_vector_cvt_x4_f32_u32: +define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x4_u32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { +; CHECK-LABEL: multi_vector_cvt_x4_u32_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov z6.d, z3.d @@ -81,26 +81,26 @@ define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscal ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: fcvtzu { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x i32>%zn3) - ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %res + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res } ; ; SCVTF ; -define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_s32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) { -; CHECK-LABEL: multi_vector_cvt_x2_s32_f32: +define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) { +; CHECK-LABEL: multi_vector_cvt_x2_f32_s32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: scvtf { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1) - ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res + %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + ret {<vscale x 4 x float>, <vscale x 4 x float>} %res } -define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x4_s32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { -; CHECK-LABEL: multi_vector_cvt_x4_s32_f32: +define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x4_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) { +; CHECK-LABEL: multi_vector_cvt_x4_f32_s32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov z6.d, z3.d @@ -108,26 +108,26 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: scvtf { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1, <vscale x 4 x float>%zn2, <vscale x 4 x float>%zn3) - ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res + %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res } ; ; UCVTF ; -define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_u32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) { -; CHECK-LABEL: multi_vector_cvt_x2_u32_f32: +define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) { +; CHECK-LABEL: multi_vector_cvt_x2_f32_u32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: ucvtf { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1) - ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res + %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + ret {<vscale x 4 x float>, <vscale x 4 x float>} %res } -define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x4_u32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,<vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { -; CHECK-LABEL: multi_vector_cvt_x4_u32_f32: +define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x4_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,<vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) { +; CHECK-LABEL: multi_vector_cvt_x4_f32_u32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov z6.d, z3.d @@ -135,17 +135,17 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: ucvtf { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1, <vscale x 4 x float>%zn2, <vscale x 4 x float>%zn3) - ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res + %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res } declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>) -declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) >From 03b0a011d5613b1f5633f0e52ebba3ee806f9b9a Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Thu, 11 Jan 2024 10:39:02 +0000 Subject: [PATCH 2/2] Rename VG2 intrinsics to X2 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 7dd97b4b42736a..8c0f06ed2ba527 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3095,23 +3095,23 @@ let TargetPrefix = "aarch64" in { [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class SME2_CVT_ItoF_VG2_Intrinsic + class SME2_CVT_ItoF_X2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; - class SME2_CVT_FtoI_VG2_Intrinsic + class SME2_CVT_FtoI_X2_Intrinsic : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; - class SME2_CVT_ItoF_VG4_Intrinsic + class SME2_CVT_ItoF_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; - class SME2_CVT_FtoI_VG4_Intrinsic + class SME2_CVT_FtoI_X4_Intrinsic : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], @@ -3403,14 +3403,14 @@ let TargetPrefix = "aarch64" in { // def int_aarch64_sve_fcvt_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; def int_aarch64_sve_bfcvt_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic; - def int_aarch64_sve_fcvts_x2 : SME2_CVT_FtoI_VG2_Intrinsic; - def int_aarch64_sve_fcvtu_x2 : SME2_CVT_FtoI_VG2_Intrinsic; - def int_aarch64_sve_scvtf_x2 : SME2_CVT_ItoF_VG2_Intrinsic; - def int_aarch64_sve_ucvtf_x2 : SME2_CVT_ItoF_VG2_Intrinsic; - def int_aarch64_sve_fcvts_x4 : SME2_CVT_FtoI_VG4_Intrinsic; - def int_aarch64_sve_fcvtu_x4 : SME2_CVT_FtoI_VG4_Intrinsic; - def int_aarch64_sve_scvtf_x4 : SME2_CVT_ItoF_VG4_Intrinsic; - def int_aarch64_sve_ucvtf_x4 : SME2_CVT_ItoF_VG4_Intrinsic; + def int_aarch64_sve_fcvts_x2 : SME2_CVT_FtoI_X2_Intrinsic; + def int_aarch64_sve_fcvtu_x2 : SME2_CVT_FtoI_X2_Intrinsic; + def int_aarch64_sve_scvtf_x2 : SME2_CVT_ItoF_X2_Intrinsic; + def int_aarch64_sve_ucvtf_x2 : SME2_CVT_ItoF_X2_Intrinsic; + def int_aarch64_sve_fcvts_x4 : SME2_CVT_FtoI_X4_Intrinsic; + def int_aarch64_sve_fcvtu_x4 : SME2_CVT_FtoI_X4_Intrinsic; + def int_aarch64_sve_scvtf_x4 : SME2_CVT_ItoF_X4_Intrinsic; + def int_aarch64_sve_ucvtf_x4 : SME2_CVT_ItoF_X4_Intrinsic; // // Multi-vector saturating extract narrow _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits