Author: David Green Date: 2022-01-27T18:43:06Z New Revision: 1fec2154b29f84b53dd578b9f87f34e255630771
URL: https://github.com/llvm/llvm-project/commit/1fec2154b29f84b53dd578b9f87f34e255630771 DIFF: https://github.com/llvm/llvm-project/commit/1fec2154b29f84b53dd578b9f87f34e255630771.diff LOG: [ARM][AArch64] Cleanup and autogenerate v8.1a vqdrmlah tests. NFC Added: Modified: clang/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll llvm/test/CodeGen/ARM/neon-v8.1a.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c index 7abfa3bda7281..ac583eddaecaa 100644 --- a/clang/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c @@ -1,198 +1,275 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ -// RUN: -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +v8.1a -S -emit-llvm -disable-O0-optnone -o - %s | opt -mem2reg -dce -S | FileCheck %s // REQUIRES: aarch64-registered-target #include <arm_neon.h> -// CHECK-LABEL: test_vqrdmlah_laneq_s16 +// CHECK-LABEL: @test_vqrdmlah_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x i16> [[VQADD_V2_I]] +// int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { -// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> -// CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlah_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: test_vqrdmlah_laneq_s32 +// CHECK-LABEL: @test_vqrdmlah_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> +// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <2 x i32> [[VQADD_V2_I]] +// int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { -// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> -// CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlah_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: test_vqrdmlahq_laneq_s16 +// CHECK-LABEL: @test_vqrdmlahq_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <8 x i16> [[VQADDQ_V2_I]] +// int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { -// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -// CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlahq_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: test_vqrdmlahq_laneq_s32 +// CHECK-LABEL: @test_vqrdmlahq_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x i32> [[VQADDQ_V2_I]] +// int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { -// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlahq_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: test_vqrdmlahh_s16 +// CHECK-LABEL: @test_vqrdmlahh_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[C:%.*]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[add]], i64 0 return vqrdmlahh_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlahs_s32 +// CHECK-LABEL: @test_vqrdmlahs_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQADDS_S32_I]] +// int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlahs_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlahh_lane_s16 +// CHECK-LABEL: @test_vqrdmlahh_lane_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { -// CHECK: extractelement <4 x i16> {{%.*}}, i32 3 -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[add]], i64 0 return vqrdmlahh_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlahs_lane_s32 +// CHECK-LABEL: @test_vqrdmlahs_lane_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 +// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[VGET_LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQADDS_S32_I]] +// int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { -// CHECK: extractelement <2 x i32> {{%.*}}, i32 1 -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlahs_lane_s32(a, b, c, 1); } -// CHECK-LABEL: test_vqrdmlahh_laneq_s16 +// CHECK-LABEL: @test_vqrdmlahh_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { -// CHECK: extractelement <8 x i16> {{%.*}}, i32 7 -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[add]], i64 0 return vqrdmlahh_laneq_s16(a, b, c, 7); } -// CHECK-LABEL: test_vqrdmlahs_laneq_s32 +// CHECK-LABEL: @test_vqrdmlahs_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 +// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[VGETQ_LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQADDS_S32_I]] +// int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { -// CHECK: extractelement <4 x i32> {{%.*}}, i32 3 -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlahs_laneq_s32(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlsh_laneq_s16 +// CHECK-LABEL: @test_vqrdmlsh_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x i16> [[VQSUB_V2_I]] +// int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { -// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> -// CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlsh_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: test_vqrdmlsh_laneq_s32 +// CHECK-LABEL: @test_vqrdmlsh_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> +// CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <2 x i32> [[VQSUB_V2_I]] +// int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { -// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> -// CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlsh_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: test_vqrdmlshq_laneq_s16 +// CHECK-LABEL: @test_vqrdmlshq_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <8 x i16> [[VQSUBQ_V2_I]] +// int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { -// CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -// CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlshq_laneq_s16(a, b, v, 7); } -// CHECK-LABEL: test_vqrdmlshq_laneq_s32 +// CHECK-LABEL: @test_vqrdmlshq_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x i32> [[VQSUBQ_V2_I]] +// int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { -// CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlshq_laneq_s32(a, b, v, 3); } -// CHECK-LABEL: test_vqrdmlshh_s16 +// CHECK-LABEL: @test_vqrdmlshh_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[C:%.*]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[sub]], i64 0 return vqrdmlshh_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlshs_s32 +// CHECK-LABEL: @test_vqrdmlshs_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VQRDMULHS_S32_I_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQSUBS_S32_I]] +// int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlshs_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlshh_lane_s16 +// CHECK-LABEL: @test_vqrdmlshh_lane_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { -// CHECK: extractelement <4 x i16> {{%.*}}, i32 3 -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[sub]], i64 0 return vqrdmlshh_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlshs_lane_s32 +// CHECK-LABEL: @test_vqrdmlshs_lane_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 +// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[VGET_LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQSUBS_S32_I]] +// int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { -// CHECK: extractelement <2 x i32> {{%.*}}, i32 1 -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlshs_lane_s32(a, b, c, 1); } -// CHECK-LABEL: test_vqrdmlshh_laneq_s16 +// CHECK-LABEL: @test_vqrdmlshh_laneq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 +// CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 +// CHECK-NEXT: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[A:%.*]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i64 0 +// CHECK-NEXT: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP3]], <4 x i16> [[TMP4]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 +// CHECK-NEXT: ret i16 [[TMP5]] +// int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { -// CHECK: extractelement <8 x i16> {{%.*}}, i32 7 -// CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) -// CHECK: extractelement <4 x i16> [[mul]], i64 0 -// CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 -// CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) -// CHECK: extractelement <4 x i16> [[sub]], i64 0 return vqrdmlshh_laneq_s16(a, b, c, 7); } -// CHECK-LABEL: test_vqrdmlshs_laneq_s32 +// CHECK-LABEL: @test_vqrdmlshs_laneq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 +// CHECK-NEXT: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[B:%.*]], i32 [[VGETQ_LANE]]) #[[ATTR4]] +// CHECK-NEXT: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A:%.*]], i32 [[VQRDMULHS_S32_I]]) #[[ATTR4]] +// CHECK-NEXT: ret i32 [[VQSUBS_S32_I]] +// int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { -// CHECK: extractelement <4 x i32> {{%.*}}, i32 3 -// CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) -// CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) return vqrdmlshs_laneq_s32(a, b, c, 3); } diff --git a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c index 5462c17a1cc50..194b4863e33fe 100644 --- a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c @@ -1,187 +1,332 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-abi apcs-gnu -target-feature +neon \ -// RUN: -S -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM +// RUN: -S -emit-llvm -o - %s -disable-O0-optnone | opt -mem2reg -dce -S \ +// RUN: | FileCheck %s --check-prefix=CHECK-ARM // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ -// RUN: -target-feature +v8.1a -S -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 +// RUN: -target-feature +v8.1a -S -emit-llvm -o - %s -disable-O0-optnone | opt -mem2reg -dce -S \ +// RUN: | FileCheck %s --check-prefix=CHECK-AARCH64 // REQUIRES: arm-registered-target,aarch64-registered-target #include <arm_neon.h> -// CHECK-LABEL: test_vqrdmlah_s16 +// CHECK-ARM-LABEL: @test_vqrdmlah_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR4:[0-9]+]] +// CHECK-ARM-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i16> [[VQADD_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlah_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR3:[0-9]+]] +// CHECK-AARCH64-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i16> [[VQADD_V2_I]] +// int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlah_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlah_s32 +// CHECK-ARM-LABEL: @test_vqrdmlah_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <2 x i32> [[VQADD_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlah_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <2 x i32> [[VQADD_V2_I]] +// int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlah_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlahq_s16 +// CHECK-ARM-LABEL: @test_vqrdmlahq_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <8 x i16> [[VQADDQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlahq_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <8 x i16> [[VQADDQ_V2_I]] +// int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlahq_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlahq_s32 +// CHECK-ARM-LABEL: @test_vqrdmlahq_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i32> [[VQADDQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlahq_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i32> [[VQADDQ_V2_I]] +// int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlahq_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlah_lane_s16 +// CHECK-ARM-LABEL: @test_vqrdmlah_lane_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i16> [[VQADD_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i16> [[VQADD_V2_I]] +// int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { -// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlah_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlah_lane_s32 +// CHECK-ARM-LABEL: @test_vqrdmlah_lane_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <2 x i32> [[VQADD_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlah_lane_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <2 x i32> [[VQADD_V2_I]] +// int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { -// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlah_lane_s32(a, b, c, 1); } -// CHECK-LABEL: test_vqrdmlahq_lane_s16 +// CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <8 x i16> [[VQADDQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <8 x i16> [[VQADDQ_V2_I]] +// int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { -// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlahq_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlahq_lane_s32 +// CHECK-ARM-LABEL: @test_vqrdmlahq_lane_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i32> [[VQADDQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlahq_lane_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i32> [[VQADDQ_V2_I]] +// int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { -// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlahq_lane_s32(a, b, c, 1); } -// CHECK-LABEL: test_vqrdmlsh_s16 +// CHECK-ARM-LABEL: @test_vqrdmlsh_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i16> [[VQSUB_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlsh_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i16> [[VQSUB_V2_I]] +// int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlsh_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlsh_s32 +// CHECK-ARM-LABEL: @test_vqrdmlsh_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <2 x i32> [[VQSUB_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlsh_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <2 x i32> [[VQSUB_V2_I]] +// int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlsh_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlshq_s16 +// CHECK-ARM-LABEL: @test_vqrdmlshq_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <8 x i16> [[VQSUBQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlshq_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <8 x i16> [[VQSUBQ_V2_I]] +// int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlshq_s16(a, b, c); } -// CHECK-LABEL: test_vqrdmlshq_s32 +// CHECK-ARM-LABEL: @test_vqrdmlshq_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i32> [[VQSUBQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlshq_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i32> [[VQSUBQ_V2_I]] +// int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlshq_s32(a, b, c); } -// CHECK-LABEL: test_vqrdmlsh_lane_s16 +// CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i16> [[VQSUB_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i16> [[VQSUB_V2_I]] +// int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { -// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) return vqrdmlsh_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlsh_lane_s32 +// CHECK-ARM-LABEL: @test_vqrdmlsh_lane_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> +// CHECK-ARM-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <2 x i32> [[VQSUB_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlsh_lane_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> +// CHECK-AARCH64-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[VQRDMULH_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <2 x i32> [[VQSUB_V2_I]] +// int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { -// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) return vqrdmlsh_lane_s32(a, b, c, 1); } -// CHECK-LABEL: test_vqrdmlshq_lane_s16 +// CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s16( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <8 x i16> [[VQSUBQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s16( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <8 x i16> [[VQSUBQ_V2_I]] +// int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { -// CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) return vqrdmlshq_lane_s16(a, b, c, 3); } -// CHECK-LABEL: test_vqrdmlshq_lane_s32 +// CHECK-ARM-LABEL: @test_vqrdmlshq_lane_s32( +// CHECK-ARM-NEXT: entry: +// CHECK-ARM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-ARM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-ARM-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> +// CHECK-ARM-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR4]] +// CHECK-ARM-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR4]] +// CHECK-ARM-NEXT: ret <4 x i32> [[VQSUBQ_V2_I]] +// +// CHECK-AARCH64-LABEL: @test_vqrdmlshq_lane_s32( +// CHECK-AARCH64-NEXT: entry: +// CHECK-AARCH64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8> +// CHECK-AARCH64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// CHECK-AARCH64-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> +// CHECK-AARCH64-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[VQRDMULHQ_V2_I]]) #[[ATTR3]] +// CHECK-AARCH64-NEXT: ret <4 x i32> [[VQSUBQ_V2_I]] +// int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { -// CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) return vqrdmlshq_lane_s32(a, b, c, 1); } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll index 3e15bd85cd219..3efa931404c15 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll @@ -1,30 +1,24 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mcpu=falkor -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mcpu=saphira -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+rdm | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -mattr=+v8.1a | FileCheck %s declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) -declare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16) declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) -declare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16) declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) -declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16) ;----------------------------------------------------------------------------- ; RDMA Vector @@ -32,81 +26,81 @@ declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16) define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmlah_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h -; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.4h -; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2 ret <4 x i16> %retval } define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmlah_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlah v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h -; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.8h -; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2 ret <8 x i16> %retval } define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlah_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s -; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.2s -; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2 ret <2 x i32> %retval } define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlah_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlah v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK-V81: sqrdmulh v1.4s, v1.4s, v2.4s -; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.4s -; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2 ret <4 x i32> %retval } define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmlsh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h +; CHECK-NEXT: ret %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h -; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.4h -; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2 ret <4 x i16> %retval } define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmlsh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlsh v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h -; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.8h -; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2 ret <8 x i16> %retval } define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlsh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.2s +; CHECK-NEXT: ret %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s -; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.2s -; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2 ret <2 x i32> %retval } define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlsh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqrdmlsh v0.4s, v1.4s, v2.4s +; CHECK-NEXT: ret %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.4s -; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.4s -; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2 ret <4 x i32> %retval } @@ -116,97 +110,101 @@ define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { ; CHECK-LABEL: test_sqrdmlah_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.h[3] -; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.h[3] -; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2[3] ret <4 x i16> %retval } define <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) { ; CHECK-LABEL: test_sqrdmlahq_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmlah v0.8h, v1.8h, v2.h[2] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2] -; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.h[2] -; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2[2] ret <8 x i16> %retval } define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { ; CHECK-LABEL: test_sqrdmlah_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqrdmlah v0.2s, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1] -; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.s[1] -; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2[1] ret <2 x i32> %retval } define <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) { ; CHECK-LABEL: test_sqrdmlahq_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmlah v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0] -; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.s[0] -; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2[0] ret <4 x i32> %retval } define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { ; CHECK-LABEL: test_sqrdmlsh_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.h[3] -; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.h[3] -; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2[3] ret <4 x i16> %retval } define <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) { ; CHECK-LABEL: test_sqrdmlshq_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmlsh v0.8h, v1.8h, v2.h[2] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2] -; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.h[2] -; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2[2] ret <8 x i16> %retval } define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { ; CHECK-LABEL: test_sqrdmlsh_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqrdmlsh v0.2s, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1] -; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.s[1] -; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2[1] ret <2 x i32> %retval } define <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) { ; CHECK-LABEL: test_sqrdmlshq_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqrdmlsh v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0] -; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.s[0] -; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2[0] ret <4 x i32> %retval } @@ -217,109 +215,129 @@ entry: define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) { ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmlah v2.4h, v0.4h, v1.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1> %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0 %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod) %retval = extractelement <4 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1] -; CHECK-V81a: sqrdmlah {{v[2-9]+}}.4h, v0.4h, v1.h[1] -; CHECK-V81a-apple: sqrdmlah.4h {{v[2-9]+}}, v0, v1[1] ret i16 %retval } define i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) { ; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlah v2.8h, v0.8h, v1.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1> %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0 %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod) %retval = extractelement <8 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1] -; CHECK-V81a: sqrdmlah {{v[2-9]+}}.8h, v0.8h, v1.h[1] -; CHECK-V81a-apple: sqrdmlah.8h {{v[2-9]+}}, v0, v1[1] ret i16 %retval } define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) { ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmlah v2.2s, v0.2s, v1.s[0] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %extract = extractelement <2 x i32> %prod, i64 0 %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract) -; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0] -; CHECK-V81a: sqrdmlah v2.2s, v0.2s, v1.s[0] -; CHECK-V81a-apple: sqrdmlah.2s v2, v0, v1[0] ret i32 %retval } define i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) { ; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlah v2.4s, v0.4s, v1.s[0] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %extract = extractelement <4 x i32> %prod, i64 0 %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract) -; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0] -; CHECK-V81a: sqrdmlah v2.4s, v0.4s, v1.s[0] -; CHECK-V81a-apple: sqrdmlah.4s v2, v0, v1[0] ret i32 %retval } define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) { ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmlsh v2.4h, v0.4h, v1.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1> %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0 %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod) %retval = extractelement <4 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1] -; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.4h, v0.4h, v1.h[1] -; CHECK-V81a-apple: sqrdmlsh.4h {{v[2-9]+}}, v0, v1[1] ret i16 %retval } define i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) { ; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh v2.8h, v0.8h, v1.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1> %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0 %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod) %retval = extractelement <8 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1] -; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.8h, v0.8h, v1.h[1] -; CHECK-V81a-apple: sqrdmlsh.8h {{v[2-9]+}}, v0, v1[1] ret i16 %retval } define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) { ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmlsh v2.2s, v0.2s, v1.s[0] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %extract = extractelement <2 x i32> %prod, i64 0 %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract) -; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0] -; CHECK-V81a: sqrdmlsh v2.2s, v0.2s, v1.s[0] -; CHECK-V81a-apple: sqrdmlsh.2s v2, v0, v1[0] ret i32 %retval } define i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) { ; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh v2.4s, v0.4s, v1.s[0] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %extract = extractelement <4 x i32> %prod, i64 0 %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract) -; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0] -; CHECK-V81a: sqrdmlsh v2.4s, v0.4s, v1.s[0] -; CHECK-V81a-apple: sqrdmlsh.4s v2, v0, v1[0] ret i32 %retval } @@ -329,77 +347,102 @@ entry: define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) { ; CHECK-LABEL: test_sqrdmlah_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w2 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlah v2.4h, v0.4h, v1.4h +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0 %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0 %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec) %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0 %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec) %retval = extractelement <4 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ret i16 %retval } define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) { ; CHECK-LABEL: test_sqrdmlah_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w2 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlah v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0 %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0 %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec) %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0 %retval_vec = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec) %retval = extractelement <4 x i32> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK-V81a-apple: sqrdmlah.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ret i32 %retval } define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) { ; CHECK-LABEL: test_sqrdmlsh_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w2 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh v2.4h, v0.4h, v1.4h +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0 %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0 %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec) %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0 %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec) %retval = extractelement <4 x i16> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-V81a-apple: sqrdmlsh.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ret i16 %retval } define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) { ; CHECK-LABEL: test_sqrdmlsh_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w2 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0 %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0 %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec) %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0 %retval_vec = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec) %retval = extractelement <4 x i32> %retval_vec, i64 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK-V81a-apple: sqrdmlsh.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} ret i32 %retval } + define i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) { ; CHECK-LABEL: test_sqrdmlah_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w2 +; CHECK-NEXT: sqrdmlah s1, s0, s2 +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ret %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs) %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod) -; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret i32 %retval } define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) { ; CHECK-LABEL: test_sqrdmlsh_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w2 +; CHECK-NEXT: sqrdmlsh s1, s0, s2 +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ret %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs) %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod) -; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret i32 %retval } @@ -410,50 +453,63 @@ define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) { define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) { ; CHECK-LABEL: test_sqrdmlah_extract_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqrdmlah v2.4h, v1.4h, v0.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret %shuffle = shufflevector <4 x i16> %y_vec, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1> %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0 %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %shuffle) %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0 %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod) %retval = extractelement <4 x i16> %retval_vec, i32 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1] -; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1] -; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, v0[1] ret i16 %retval } define i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlah_extract_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlah s2, s1, v0.s[3] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret %extract = extractelement <4 x i32> %rhs, i32 3 %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract) %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod) -; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] -; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] -; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3] ret i32 %retval } define i16 @test_sqrdmlshq_extract_i16(i16 %acc, i16 %x, <8 x i16> %y_vec) { ; CHECK-LABEL: test_sqrdmlshq_extract_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh v2.8h, v1.8h, v0.h[1] +; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: ret %shuffle = shufflevector <8 x i16> %y_vec, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1> %x_vec = insertelement <8 x i16> undef, i16 %x, i64 0 %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x_vec, <8 x i16> %shuffle) %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0 %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod) %retval = extractelement <8 x i16> %retval_vec, i32 0 -; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1] -; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1] -; CHECK-V81a-apple: sqrdmlsh.8h {{v[0-9]+}}, {{v[0-9]+}}, v0[1] ret i16 %retval } define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmlsh_extract_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: sqrdmlsh s2, s1, v0.s[3] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: ret %extract = extractelement <4 x i32> %rhs, i32 3 %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract) %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod) -; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] -; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] -; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3] ret i32 %retval } diff --git a/llvm/test/CodeGen/ARM/neon-v8.1a.ll b/llvm/test/CodeGen/ARM/neon-v8.1a.ll index 95d2085800810..ca6c24d905e2c 100644 --- a/llvm/test/CodeGen/ARM/neon-v8.1a.ll +++ b/llvm/test/CodeGen/ARM/neon-v8.1a.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s ;----------------------------------------------------------------------------- @@ -18,149 +19,185 @@ declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) -define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { -; CHECK-LABEL: test_vqrdmlah_v4i16: +define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmulah_v4i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlah.s16 d0, d1, d2 +; CHECK-NEXT: bx lr %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } -define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { -; CHECK-LABEL: test_vqrdmlah_v8i16: +define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmulah_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlah.s16 q0, q1, q2 +; CHECK-NEXT: bx lr %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } -define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { -; CHECK-LABEL: test_vqrdmlah_v2i32: +define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmulah_v2i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlah.s32 d0, d1, d2 +; CHECK-NEXT: bx lr %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } -define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { -; CHECK-LABEL: test_vqrdmlah_v4i32: +define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmulah_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlah.s32 q0, q1, q2 +; CHECK-NEXT: bx lr %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } -define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { -; CHECK-LABEL: test_vqrdmlsh_v4i16: +define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmulsh_v4i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlsh.s16 d0, d1, d2 +; CHECK-NEXT: bx lr %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } -define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { -; CHECK-LABEL: test_vqrdmlsh_v8i16: +define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmulsh_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlsh.s16 q0, q1, q2 +; CHECK-NEXT: bx lr %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } -define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { -; CHECK-LABEL: test_vqrdmlsh_v2i32: +define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmulsh_v2i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlsh.s32 d0, d1, d2 +; CHECK-NEXT: bx lr %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } -define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { -; CHECK-LABEL: test_vqrdmlsh_v4i32: +define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmulsh_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vqrdmlsh.s32 q0, q1, q2 +; CHECK-NEXT: bx lr %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } ;----------------------------------------------------------------------------- ; RDMA Scalar -define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { -; CHECK-LABEL: test_vqrdmlah_lane_s16: +define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulah_lane_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrdmlah.s16 d0, d1, d2[3] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } -define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { -; CHECK-LABEL: test_vqrdmlahq_lane_s16: +define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulahq_lane_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 +; CHECK-NEXT: vqrdmlah.s16 q0, q1, d4[2] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } -define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { -; CHECK-LABEL: test_vqrdmlah_lane_s32: +define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulah_lane_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrdmlah.s32 d0, d1, d2[1] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } -define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { -; CHECK-LABEL: test_vqrdmlahq_lane_s32: +define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulahq_lane_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 +; CHECK-NEXT: vqrdmlah.s32 q0, q1, d4[0] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } -define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { -; CHECK-LABEL: test_vqrdmlsh_lane_s16: +define arm_aapcs_vfpcc <4 x i16> @test_vqrdmulsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulsh_lane_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrdmlsh.s16 d0, d1, d2[3] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) -; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } -define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { -; CHECK-LABEL: test_vqrdmlshq_lane_s16: +define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmulshq_lane_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 +; CHECK-NEXT: vqrdmlsh.s16 q0, q1, d4[2] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) -; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } -define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { -; CHECK-LABEL: test_vqrdmlsh_lane_s32: +define arm_aapcs_vfpcc <2 x i32> @test_vqrdmulsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulsh_lane_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqrdmlsh.s32 d0, d1, d2[1] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) -; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } -define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { -; CHECK-LABEL: test_vqrdmlshq_lane_s32: +define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmulshq_lane_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d4 killed $d4 def $q2 +; CHECK-NEXT: vqrdmlsh.s32 q0, q1, d4[0] +; CHECK-NEXT: bx lr entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) -; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits