================ @@ -0,0 +1,433 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +#include <arm_neon.h> +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) { + return vluti2_lane_u8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_u8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_u8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_u8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) { + return vluti2_lane_s8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_s8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_s8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_s8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) { + return vluti2_lane_p8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8( +// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_p8(vn, vm, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_p8(vn, vm, 3); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8( +// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) +// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] +// +poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_p8(vn, vm, 7); +} + +// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16( +// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8> +// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0) +// CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE]] +// +uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) { ---------------- CarolineConcatto wrote:
This test is failing when trying to lower: fatal error: error in backend: Cannot select: intrinsic %llvm.aarch64.neon.vluti2.lane PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script. Stack dump: 0. Program arguments: ./bin/clang -target aarch64-linux-gnu -march=armv8-a+lut+bf16+simd -S -o - ../clang/test/CodeGen/aarch64-neon-luti.c 1. <eof> parser at end of file 2. Code generation 3. Running pass 'Function Pass Manager' on module '../clang/test/CodeGen/aarch64-neon-luti.c'. 4. Running pass 'AArch64 Instruction Selection' on function '@test_vluti2_lane_u16' https://github.com/llvm/llvm-project/pull/96883 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits