llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (Lukacma) <details> <summary>Changes</summary> This patch adds intrinsics for NEON LUTI2 and LUTI4 instructions as specified in the [ACLE proposal](https://github.com/ARM-software/acle/pull/324) --- Patch is 45.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96883.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_neon.td (+16) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+54) - (added) clang/test/CodeGen/aarch64-neon-luti.c (+433) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+19) - (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+7-7) - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+70) - (added) llvm/test/CodeGen/AArch64/neon-luti.ll (+207) ``````````diff diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 6390ba3f9fe5e..0dd76ce32fc20 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -2096,3 +2096,19 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">; def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">; } + +//Lookup table read with 2-bit/4-bit indices +let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in { + def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc">; + def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc">; + def VLUTI2_H : SInst<"vluti2_lane", "Q.(qU<)I", "sUsPshQsQUsQPsQh">; + def VLUTI2_H_Q : SInst<"vluti2_laneq", "Q.(QU<)I", "sUsPshQsQUsQPsQh">; + def VLUTI4_B : SInst<"vluti4_laneq", "..UI", "QcQUcQPc">; + def VLUTI4_H_X2 : SInst<"vluti4_laneq_x2", ".2(U<)I", "QsQUsQPsQh">; + + let ArchGuard = "defined(__aarch64__)", TargetGuard= "lut,bf16" in { + def VLUTI2_BF : SInst<"vluti2_lane", "Q.(qU<)I", "bQb">; + def VLUTI2_BF_Q : SInst<"vluti2_laneq", "Q.(QU<)I", "bQb">; + def VLUTI4_BF_X2 : SInst<"vluti4_laneq_x2", ".2(U<)I", "Qb">; + } +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 511e1fd4016d7..f9ac6c9dc8504 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13357,6 +13357,60 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } + + case NEON::BI__builtin_neon_vluti2_lane_bf16: + case NEON::BI__builtin_neon_vluti2_lane_f16: + case NEON::BI__builtin_neon_vluti2_lane_p16: + case NEON::BI__builtin_neon_vluti2_lane_p8: + case NEON::BI__builtin_neon_vluti2_lane_s16: + case NEON::BI__builtin_neon_vluti2_lane_s8: + case NEON::BI__builtin_neon_vluti2_lane_u16: + case NEON::BI__builtin_neon_vluti2_lane_u8: + case NEON::BI__builtin_neon_vluti2_laneq_bf16: + case NEON::BI__builtin_neon_vluti2_laneq_f16: + case NEON::BI__builtin_neon_vluti2_laneq_p16: + case NEON::BI__builtin_neon_vluti2_laneq_p8: + case NEON::BI__builtin_neon_vluti2_laneq_s16: + case NEON::BI__builtin_neon_vluti2_laneq_s8: + case NEON::BI__builtin_neon_vluti2_laneq_u16: + case NEON::BI__builtin_neon_vluti2_laneq_u8: + case NEON::BI__builtin_neon_vluti2q_lane_bf16: + case NEON::BI__builtin_neon_vluti2q_lane_f16: + case NEON::BI__builtin_neon_vluti2q_lane_p16: + case NEON::BI__builtin_neon_vluti2q_lane_p8: + case NEON::BI__builtin_neon_vluti2q_lane_s16: + case NEON::BI__builtin_neon_vluti2q_lane_s8: + case NEON::BI__builtin_neon_vluti2q_lane_u16: + case NEON::BI__builtin_neon_vluti2q_lane_u8: + case NEON::BI__builtin_neon_vluti2q_laneq_bf16: + case NEON::BI__builtin_neon_vluti2q_laneq_f16: + case NEON::BI__builtin_neon_vluti2q_laneq_p16: + case NEON::BI__builtin_neon_vluti2q_laneq_p8: + case NEON::BI__builtin_neon_vluti2q_laneq_s16: + case NEON::BI__builtin_neon_vluti2q_laneq_s8: + case NEON::BI__builtin_neon_vluti2q_laneq_u16: + case NEON::BI__builtin_neon_vluti2q_laneq_u8: { + Int = Intrinsic::aarch64_neon_vluti2_lane; + llvm::Type *Tys[3]; + Tys[0] = Ty; + Tys[1] = Ops[0]->getType(); + Tys[2] = Ops[1]->getType(); + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane"); + } + case NEON::BI__builtin_neon_vluti4q_laneq_p8: + case NEON::BI__builtin_neon_vluti4q_laneq_s8: + case NEON::BI__builtin_neon_vluti4q_laneq_u8: { + Int = Intrinsic::aarch64_neon_vluti4q_laneq; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq"); + } + case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2: + case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: { + Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2"); + } } } diff --git a/clang/test/CodeGen/aarch64-neon-luti.c b/clang/test/CodeGen/aarch64-neon-luti.c new file mode 100644 index 0000000000000..4f10acdce302a --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-luti.c @@ -0,0 +1,433 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +#include <arm_neon.h> +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) { + return vluti2_lane_u8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_u8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_u8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_u8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) { + return vluti2_lane_s8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_s8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_s8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_s8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) { + return vluti2_lane_p8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_p8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_p8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_p8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) { + return vluti2_lane_u16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v16i8(<8 x i8> [[TMP0]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_u16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16( +// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v16i8.v8i8(<16 x i8> [[TMP0]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_u16(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16( +// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v16i8.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_u16(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) { + return vluti2_lane_s16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v16i8(<8 x i8> [[TMP0]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_s16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16( +// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v16i8.v8i8(<16 x i8> [[TMP0]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_s16(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16( +// CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v16i8.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_s16(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16( +// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE]] +// +float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) { + return vluti2_lane_f16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16( +// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8i8.v16i8(<8 x i8> [[TMP0]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE]] +// +float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_f16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16( +// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v16i8.v8i8(<16 x i8> [[TMP0]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE]] +// +float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_f16(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16( +// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v16i8.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE]] +// +float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_f16(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16( +// CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE]] +// +bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) { + return vluti2_lane_bf16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16( +// CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8i8.v16i8(<8 x i8> [[TMP0]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE]] +// +bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_bf16(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16( +// CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v16i8.v8i8(<16 x i8> [[TMP0]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE]] +// +bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_bf16(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16( +// CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[VN]] to <16 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v16i8.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE]] +// +bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_bf16(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> n... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/96883 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits