https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/93202
According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics svfloat32x2_t svcvt_f32[_f16_x2](svfloat16_t zn) __arm_streaming; svfloat32x2_t svcvtl_f32[_f16_x2](svfloat16_t zn) __arm_streaming; These are available only if __ARM_FEATURE_SME_F16F16 is enabled. >From a1750b2b5658f8ced700bbf010019703fc52f126 Mon Sep 17 00:00:00 2001 From: Caroline Concatto <caroline.conca...@arm.com> Date: Mon, 15 Apr 2024 13:31:00 +0000 Subject: [PATCH 1/6] [LLVM][AARCH64]Replace +sme2p1+smef16f16 by +smef16f16 According to the latest ISA Spec release[1] all instructions under: HasSME2p1 and HasSMEF16F16 should now only require: HasSMEF16F16 [1]https://developer.arm.com --- llvm/test/MC/AArch64/SME2p1/fadd.s | 8 ++++++++ llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s | 2 +- llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s | 2 +- llvm/test/MC/AArch64/SME2p1/fsub.s | 8 ++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/test/MC/AArch64/SME2p1/fadd.s b/llvm/test/MC/AArch64/SME2p1/fadd.s index bdb769093c838..ec4f27e021a00 100644 --- a/llvm/test/MC/AArch64/SME2p1/fadd.s +++ b/llvm/test/MC/AArch64/SME2p1/fadd.s @@ -1,16 +1,24 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST fadd za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-10100100-00011100-00000000 diff --git a/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s b/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s index 2f0dccb57c907..c31b54fc05dea 100644 --- a/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s @@ -66,7 +66,7 @@ fmla za.h[w8, 8, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // Invalid Register Suffix fmla za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .h // CHECK-NEXT: fmla za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s b/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s index 3ff09321e3436..2deb18186eafc 100644 --- a/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s @@ -66,7 +66,7 @@ fmls za.h[w8, 8, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // Invalid Register Suffix fmls za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .h // CHECK-NEXT: fmls za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2p1/fsub.s b/llvm/test/MC/AArch64/SME2p1/fsub.s index 66410008eb11d..e42a819e0d415 100644 --- a/llvm/test/MC/AArch64/SME2p1/fsub.s +++ b/llvm/test/MC/AArch64/SME2p1/fsub.s @@ -1,16 +1,24 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST >From 1dab277b62d14163af243cfe608ad43dbe687a45 Mon Sep 17 00:00:00 2001 From: Caroline Concatto <caroline.conca...@arm.com> Date: Thu, 18 Apr 2024 13:53:36 +0000 Subject: [PATCH 2/6] Address review comments --- llvm/include/llvm/TargetParser/AArch64TargetParser.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 0d1cfd152151a..0b4fb0ad773ec 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -303,6 +303,7 @@ inline constexpr ExtensionInfo Extensions[] = { {"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0}, {"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0}, {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+fp8,+sme2", 0}, + {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+fp8,+sme2", 0}, {"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0}, {"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_INIT, "", 0}, {"cpa", AArch64::AEK_CPA, "+cpa", "-cpa", FEAT_INIT, "", 0}, >From 176083b8562ef5f6b265ed14a3d4f81e4555ee6e Mon Sep 17 00:00:00 2001 From: Caroline Concatto <caroline.conca...@arm.com> Date: Fri, 19 Apr 2024 16:03:03 +0000 Subject: [PATCH 3/6] Fix MC tests --- llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s | 2 +- llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s b/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s index c31b54fc05dea..2f0dccb57c907 100644 --- a/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2p1/fmla-diagnostics.s @@ -66,7 +66,7 @@ fmla za.h[w8, 8, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // Invalid Register Suffix fmla za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s // CHECK-NEXT: fmla za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s b/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s index 2deb18186eafc..3ff09321e3436 100644 --- a/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2p1/fmls-diagnostics.s @@ -66,7 +66,7 @@ fmls za.h[w8, 8, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // Invalid Register Suffix fmls za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s // CHECK-NEXT: fmls za.d[w8, 7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: >From 8cfdd4f0e317646f43ed18a08f2b5f40eafae129 Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Fri, 26 Apr 2024 13:58:47 +0000 Subject: [PATCH 4/6] [SME] Add intrinsics for FCVT(wid.) and FCVTL --- clang/include/clang/Basic/arm_sve.td | 11 +++++ .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 22 ++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_cvtl.c | 40 +++++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_cvtl.s | 27 +++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 14 ++++++- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 6 +++ .../CodeGen/AArch64/sme2-intrinsics-cvt.ll | 11 ++++- .../CodeGen/AArch64/sme2-intrinsics-cvtl.ll | 11 +++++ 8 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 15340ebb62b36..e809b4a57d654 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2265,6 +2265,10 @@ let TargetGuard = "sme2" in { def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; } +let TargetGuard = "sme-f16f16" in { + def SVCVT_F32_X2 : SInst<"svcvt_{d}[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvt_widen_x2", [ IsStreaming],[]>; +} + // // Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16 // @@ -2273,6 +2277,13 @@ let TargetGuard = "sme2" in { def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>; } +// +//Multi-vector floating-point convert from half-precision to deinterleaved single-precision. +// +let TargetGuard = "sme-f16f16" in { + def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; +} + // // Multi-vector saturating extract narrow // diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 79a11c2ec153e..d117a685bfc29 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -497,3 +497,25 @@ svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming { svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_s64_x4,,)(zn); } + +// CHECK-LABEL: @test_cvt_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4) +// CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]] +// +__attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c new file mode 100644 index 0000000000000..1142065614b8f --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__SVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f16f16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_cvtl_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4) +// CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> [[ZN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]] +// +svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { + return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s new file mode 100644 index 0000000000000..55078ec0f9645 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s @@ -0,0 +1,27 @@ + .text + .file "acle_sme2_cvtl.c" + .globl test_cvtl_f32_x2 // -- Begin function test_cvtl_f32_x2 + .p2align 2 + .type test_cvtl_f32_x2,@function + .variant_pcs test_cvtl_f32_x2 +test_cvtl_f32_x2: // @test_cvtl_f32_x2 +.Ltest_cvtl_f32_x2$local: + .type .Ltest_cvtl_f32_x2$local,@function +// %bb.0: // %entry + str x29, [sp, #-16]! // 8-byte Folded Spill + addvl sp, sp, #-1 + ptrue p0.h + st1h { z0.h }, p0, [sp] + ld1h { z0.h }, p0/z, [sp] + fcvtl { z2.s, z3.s }, z0.h + mov z0.d, z2.d + mov z1.d, z3.d + addvl sp, sp, #1 + ldr x29, [sp], #16 // 8-byte Folded Reload + ret +.Lfunc_end0: + .size test_cvtl_f32_x2, .Lfunc_end0-test_cvtl_f32_x2 + .size .Ltest_cvtl_f32_x2$local, .Lfunc_end0-test_cvtl_f32_x2 + // -- End function + .ident "clang version 19.0.0git (g...@github.com:Lukacma/llvm-project.git 176083b8562ef5f6b265ed14a3d4f81e4555ee6e)" + .section ".note.GNU-stack","",@progbits diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index e31e00a9c76f3..7b8eeafec597b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3121,6 +3121,11 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty], [IntrNoMem]>; + + class SME2_CVT_WIDENING_VG2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SME2_CVT_VG4_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>], @@ -3412,6 +3417,13 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + + // + //Multi-vector floating-point convert from half-precision to deinterleaved single-precision. + // + + def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; + // // Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16 // @@ -3431,7 +3443,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic; def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic; def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic; - + def int_aarch64_sve_fcvt_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic; // // Multi-vector saturating extract narrow // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 80272213dd389..6db04c37e8a42 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5713,6 +5713,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_sve_ucvtf_x4: SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS); return; + case Intrinsic::aarch64_sve_fcvt_widen_x2: + SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S); + return; + case Intrinsic::aarch64_sve_fcvtl_widen_x2: + SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S); + return; case Intrinsic::aarch64_sve_sclamp_single_x2: if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( Node->getValueType(0), diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll index bc1db878cbd31..611cdcda157e2 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s ; ; FCVT @@ -139,6 +139,15 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res } +define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_widen_x2_f16(<vscale x 8 x half> %zn0) { +; CHECK-LABEL: multi_vector_cvt_widen_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt { z0.s, z1.s }, z0.h +; CHECK-NEXT: ret + %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> %zn0) + ret {<vscale x 4 x float>, <vscale x 4 x float>} %res +} + declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>) declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll new file mode 100644 index 0000000000000..30dc7cbfaea6c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -verify-machineinstrs < %s | FileCheck %s + +define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvtl_widen_x2_f16(<vscale x 8 x half> %zn0) { +; CHECK-LABEL: multi_vector_cvtl_widen_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl { z0.s, z1.s }, z0.h +; CHECK-NEXT: ret + %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32(<vscale x 8 x half> %zn0) + ret {<vscale x 4 x float>, <vscale x 4 x float>} %res +} >From bb90148de759c0e2113532db8dac1f3f4cd04c39 Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Fri, 26 Apr 2024 14:46:05 +0000 Subject: [PATCH 5/6] Revert incorrect changes --- .../aarch64-sme2-intrinsics/acle_sme2_cvtl.s | 27 ------------------- .../llvm/TargetParser/AArch64TargetParser.h | 1 - llvm/test/MC/AArch64/SME2p1/fadd.s | 8 ------ llvm/test/MC/AArch64/SME2p1/fsub.s | 8 ------ 4 files changed, 44 deletions(-) delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s deleted file mode 100644 index 55078ec0f9645..0000000000000 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.s +++ /dev/null @@ -1,27 +0,0 @@ - .text - .file "acle_sme2_cvtl.c" - .globl test_cvtl_f32_x2 // -- Begin function test_cvtl_f32_x2 - .p2align 2 - .type test_cvtl_f32_x2,@function - .variant_pcs test_cvtl_f32_x2 -test_cvtl_f32_x2: // @test_cvtl_f32_x2 -.Ltest_cvtl_f32_x2$local: - .type .Ltest_cvtl_f32_x2$local,@function -// %bb.0: // %entry - str x29, [sp, #-16]! // 8-byte Folded Spill - addvl sp, sp, #-1 - ptrue p0.h - st1h { z0.h }, p0, [sp] - ld1h { z0.h }, p0/z, [sp] - fcvtl { z2.s, z3.s }, z0.h - mov z0.d, z2.d - mov z1.d, z3.d - addvl sp, sp, #1 - ldr x29, [sp], #16 // 8-byte Folded Reload - ret -.Lfunc_end0: - .size test_cvtl_f32_x2, .Lfunc_end0-test_cvtl_f32_x2 - .size .Ltest_cvtl_f32_x2$local, .Lfunc_end0-test_cvtl_f32_x2 - // -- End function - .ident "clang version 19.0.0git (g...@github.com:Lukacma/llvm-project.git 176083b8562ef5f6b265ed14a3d4f81e4555ee6e)" - .section ".note.GNU-stack","",@progbits diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 0b4fb0ad773ec..0d1cfd152151a 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -303,7 +303,6 @@ inline constexpr ExtensionInfo Extensions[] = { {"lut", AArch64::AEK_LUT, "+lut", "-lut", FEAT_INIT, "", 0}, {"sme-lutv2", AArch64::AEK_SME_LUTv2, "+sme-lutv2", "-sme-lutv2", FEAT_INIT, "", 0}, {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+fp8,+sme2", 0}, - {"sme-f8f16", AArch64::AEK_SMEF8F16, "+sme-f8f16", "-sme-f8f16", FEAT_INIT, "+fp8,+sme2", 0}, {"sme-f8f32", AArch64::AEK_SMEF8F32, "+sme-f8f32", "-sme-f8f32", FEAT_INIT, "+sme2,+fp8", 0}, {"sme-fa64", AArch64::AEK_SMEFA64, "+sme-fa64", "-sme-fa64", FEAT_INIT, "", 0}, {"cpa", AArch64::AEK_CPA, "+cpa", "-cpa", FEAT_INIT, "", 0}, diff --git a/llvm/test/MC/AArch64/SME2p1/fadd.s b/llvm/test/MC/AArch64/SME2p1/fadd.s index ec4f27e021a00..bdb769093c838 100644 --- a/llvm/test/MC/AArch64/SME2p1/fadd.s +++ b/llvm/test/MC/AArch64/SME2p1/fadd.s @@ -1,24 +1,16 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST fadd za.h[w8, 0, vgx2], {z0.h, z1.h} // 11000001-10100100-00011100-00000000 diff --git a/llvm/test/MC/AArch64/SME2p1/fsub.s b/llvm/test/MC/AArch64/SME2p1/fsub.s index e42a819e0d415..66410008eb11d 100644 --- a/llvm/test/MC/AArch64/SME2p1/fsub.s +++ b/llvm/test/MC/AArch64/SME2p1/fsub.s @@ -1,24 +1,16 @@ // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ -// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f8f16 < %s \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-ERROR // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ -// RUN: | llvm-objdump -d --mattr=+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST -// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme-f16f16 < %s \ // RUN: | llvm-objdump -d --mattr=-sme2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ -// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme-f16f16 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ -// RUN: | llvm-mc -triple=aarch64 -mattr=+sme-f16f16 -disassemble -show-encoding \ // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST >From 71b4f03abf85e7612e9a157af06f76ad575ec648 Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Tue, 30 Apr 2024 13:12:50 +0000 Subject: [PATCH 6/6] removed trailing whitespace --- clang/include/clang/Basic/arm_sve.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index e809b4a57d654..af0562ad87080 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2267,7 +2267,7 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme-f16f16" in { def SVCVT_F32_X2 : SInst<"svcvt_{d}[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvt_widen_x2", [ IsStreaming],[]>; -} +} // // Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16 @@ -2282,7 +2282,7 @@ let TargetGuard = "sme2" in { // let TargetGuard = "sme-f16f16" in { def SVCVTL_F32_X2 : SInst<"svcvtl_f32[_f16_x2]", "2h", "f", MergeNone, "aarch64_sve_fcvtl_widen_x2", [ IsStreaming],[]>; -} +} // // Multi-vector saturating extract narrow _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits