llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Kerry McLaughlin (kmclaughlin-arm) <details> <summary>Changes</summary> Adds the following SME2 builtins: - svread_hor/ver, - svwrite_hor/ver, - svread_za64, - svwrite_za64 See https://github.com/ARM-software/acle/pull/217 --- Patch is 230.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71191.diff 5 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+46) - (modified) clang/include/clang/Basic/arm_sve_sme_incl.td (+1-1) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c (+1422) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c (+1097) - (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp (+57) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 8d85327a86b1aaf..d81859ba39dfd7c 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -263,3 +263,49 @@ multiclass ZAFPOuterProd<string n_suffix> { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - MOVA + +// +// 2 and 4 vector-group read/write intrinsics. +// + +multiclass WriteHV_VG<string n, string t, string i, list<ImmCheck> checks> { + let TargetGuard = "sme2" in { + def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>; + } +} + +defm SVWRITE_ZA8 : WriteHV_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA16 : WriteHV_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; +defm SVWRITE_ZA32 : WriteHV_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : WriteHV_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; + +multiclass ReadHV_VG<string n, string t, string i, list<ImmCheck> checks> { + let TargetGuard = "sme2" in { + def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>; + } +} + +defm SVREAD_ZA8 : ReadHV_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREAD_ZA16 : ReadHV_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; +defm SVREAD_ZA32 : ReadHV_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ReadHV_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; + +// +// Single vector-group read/write intrinsics. +// + +let TargetGuard = "sme2" in { + def SVWRITE_ZA64_VG1x2 : Inst<"svwrite_za64[_{d}]_vg1x2", "vm2", "lUld", MergeNone, "aarch64_sme_write_vg1x2", [IsSharedZA, IsStreaming], []>; + def SVWRITE_ZA64_VG1x4 : Inst<"svwrite_za64[_{d}]_vg1x4", "vm4", "lUld", MergeNone, "aarch64_sme_write_vg1x4", [IsSharedZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x2 : Inst<"svread_za64_{d}_vg1x2", "2m", "lUld", MergeNone, "aarch64_sme_read_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>; + def SVREAD_ZA64_VG1x4 : Inst<"svread_za64_{d}_vg1x4", "4m", "lUld", MergeNone, "aarch64_sme_read_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>; +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 3a7a5b51b25801e..22a2a3c5434d657 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -257,7 +257,7 @@ class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> { } class Inst<string n, string p, string t, MergeType mt, string i, - list<FlagType> ft, list<ImmCheck> ch, MemEltType met> { + list<FlagType> ft, list<ImmCheck> ch, MemEltType met = MemEltTyDefault> { string Name = n; string Prototype = p; string Types = t; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c new file mode 100644 index 000000000000000..706403e5180ad4b --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -0,0 +1,1422 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme_draft_spec_subject_to_change.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svread_ver_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) { + return svread_ver_za8_u8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_ver_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) { + return svread_ver_za8_s8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) { + return svread_hor_za8_u8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]] +// +svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) { + return svread_hor_za8_s8_vg2(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48) +// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]] +// +svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) { + return svread_hor_za8_u8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_hor_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48) +// CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]] +// +svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) { + return svread_hor_za8_s8_vg4(0, base); +} + +// CHECK-LABEL: @test_svread_ver_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vs... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/71191 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits