r334489 - [AArch64] Corrected FP16 Intrinsic range checks in Clang + added Sema tests
Author: lukegeeson Date: Tue Jun 12 02:54:27 2018 New Revision: 334489 URL: http://llvm.org/viewvc/llvm-project?rev=334489&view=rev Log: [AArch64] Corrected FP16 Intrinsic range checks in Clang + added Sema tests Summary: This fixes the ranges for the vcvth family of FP16 intrinsics in the clang front end. Previously it was accepting incorrect ranges -Changed builtin range checking in SemaChecking -added tests SemaCheck changes - included in their own file since no similar one exists -modified existing tests to reflect new ranges Reviewers: SjoerdMeijer, javed.absar Reviewed By: SjoerdMeijer Subscribers: kristof.beyls, cfe-commits Differential Revision: https://reviews.llvm.org/D47592 Added: cfe/trunk/test/Sema/aarch64-neon-fp16-ranges.c Modified: cfe/trunk/include/clang/Basic/arm_fp16.td cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c cfe/trunk/utils/TableGen/NeonEmitter.cpp Modified: cfe/trunk/include/clang/Basic/arm_fp16.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_fp16.td?rev=334489&r1=334488&r2=334489&view=diff == --- cfe/trunk/include/clang/Basic/arm_fp16.td (original) +++ cfe/trunk/include/clang/Basic/arm_fp16.td Tue Jun 12 02:54:27 2018 @@ -75,15 +75,15 @@ let ArchGuard = "defined(__ARM_FEATURE_F def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; - - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">; - def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; - def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; - def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; - def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; - + let isVCVT_N = 1 in { +def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">; +def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; +def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; +def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; +def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; +def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; +def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; + } // Comparison def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334489&r1=334488&r2=334489&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Tue Jun 12 02:54:27 2018 @@ -1499,10 +1499,10 @@ bool Sema::CheckNeonBuiltinFunctionCall( switch (BuiltinID) { default: return false; -#define GET_NEON_IMMEDIATE_CHECK -#include "clang/Basic/arm_neon.inc" -#include "clang/Basic/arm_fp16.inc" -#undef GET_NEON_IMMEDIATE_CHECK + #define GET_NEON_IMMEDIATE_CHECK + #include "clang/Basic/arm_neon.inc" + #include "clang/Basic/arm_fp16.inc" + #undef GET_NEON_IMMEDIATE_CHECK } return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c?rev=334489&r1=334488&r2=334489&view=diff == --- cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c Tue Jun 12 02:54:27 2018 @@ -486,90 +486,90 @@ uint16_t test_vclth_f16(float16_t a, flo // CHECK-LABEL: test_vcvth_n_f16_s16 // CHECK: [[SEXT:%.*]] = sext i16 %a to i32 -// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 0) +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 1) // CHECK: ret half [[CVT]] float16_t test_vcvth_n_f16_s16(int16_t a) { - return vcvth_n_f16_s16(a, 0); + return vcvth_n_f16_s16(a, 1); } // CHECK-LABEL: test_vcvth_n_f16_s32 -// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 0) +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) // CHECK: ret half [[CVT]] float16_t test_vcvth_n_f16_s32(int32_t a) { - return vcvth_n_f16_s32(a, 0); + return vcvth_n_f16_s32(a, 1); } // CHECK-LABEL: test_vcvth_n_f16_s64 -// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 0) +// CHECK: [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1) // CHECK: ret half [[CVT]] float16_t test_vcvth_n_f16_s64(int
r334693 - [AArch64] Added support for the vcvta_u16_f16 instrinsic for FP16 Armv8.2-A
Author: lukegeeson Date: Thu Jun 14 01:28:56 2018 New Revision: 334693 URL: http://llvm.org/viewvc/llvm-project?rev=334693&view=rev Log: [AArch64] Added support for the vcvta_u16_f16 instrinsic for FP16 Armv8.2-A Added: cfe/trunk/CodeGen/ Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334693&r1=334692&r2=334693&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 14 01:28:56 2018 @@ -3998,6 +3998,7 @@ static const NeonIntrinsicInfo ARMSIMDIn NEONMAP0(vcvt_u32_v), NEONMAP0(vcvt_u64_v), NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), @@ -4882,6 +4883,7 @@ Value *CodeGenFunction::EmitCommonNeonBu : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } case NEON::BI__builtin_neon_vcvta_s16_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvta_u32_v: @@ -7623,6 +7625,7 @@ Value *CodeGenFunction::EmitAArch64Built return Builder.CreateFPToSI(Ops[0], Ty); } case NEON::BI__builtin_neon_vcvta_s16_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=334693&r1=334692&r2=334693&view=diff == --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Thu Jun 14 01:28:56 2018 @@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } +// CHECK-LABEL: test_vcvta_u16_f16 +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) +// CHECK: ret <4 x i16> [[VCVT]] +int16x4_t test_vcvta_u16_f16 (float16x4_t a) { + return vcvta_u16_f16(a); +} + // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] Modified: cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c?rev=334693&r1=334692&r2=334693&view=diff == --- cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Thu Jun 14 01:28:56 2018 @@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } +// CHECK-LABEL: test_vcvta_u16_f16 +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.fcvtau.v4i16.v4f16(<4 x half> %a) +// CHECK: ret <4 x i16> [[VCVT]] +int16x4_t test_vcvta_u16_f16 (float16x4_t a) { + return vcvta_u16_f16(a); +} + // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334694 - Removed trunk-Codegen directory added in error
Author: lukegeeson Date: Thu Jun 14 01:51:52 2018 New Revision: 334694 URL: http://llvm.org/viewvc/llvm-project?rev=334694&view=rev Log: Removed trunk-Codegen directory added in error Removed: cfe/trunk/CodeGen/ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334696 - [AArch64] reverting rC334693 due to build failures
Author: lukegeeson Date: Thu Jun 14 01:59:33 2018 New Revision: 334696 URL: http://llvm.org/viewvc/llvm-project?rev=334696&view=rev Log: [AArch64] reverting rC334693 due to build failures Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334696&r1=334695&r2=334696&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 14 01:59:33 2018 @@ -3998,7 +3998,6 @@ static const NeonIntrinsicInfo ARMSIMDIn NEONMAP0(vcvt_u32_v), NEONMAP0(vcvt_u64_v), NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), - NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), @@ -4883,7 +4882,6 @@ Value *CodeGenFunction::EmitCommonNeonBu : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } case NEON::BI__builtin_neon_vcvta_s16_v: - case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvta_u32_v: @@ -7625,7 +7623,6 @@ Value *CodeGenFunction::EmitAArch64Built return Builder.CreateFPToSI(Ops[0], Ty); } case NEON::BI__builtin_neon_vcvta_s16_v: - case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=334696&r1=334695&r2=334696&view=diff == --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Thu Jun 14 01:59:33 2018 @@ -164,13 +164,6 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } -// CHECK-LABEL: test_vcvta_u16_f16 -// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) -// CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvta_u16_f16 (float16x4_t a) { - return vcvta_u16_f16(a); -} - // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] Modified: cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c?rev=334696&r1=334695&r2=334696&view=diff == --- cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Thu Jun 14 01:59:33 2018 @@ -164,13 +164,6 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } -// CHECK-LABEL: test_vcvta_u16_f16 -// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.fcvtau.v4i16.v4f16(<4 x half> %a) -// CHECK: ret <4 x i16> [[VCVT]] -int16x4_t test_vcvta_u16_f16 (float16x4_t a) { - return vcvta_u16_f16(a); -} - // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r334820 - [AArch64] Reverted rC334696 with Clang VCVTA test fix
Author: lukegeeson Date: Fri Jun 15 03:10:45 2018 New Revision: 334820 URL: http://llvm.org/viewvc/llvm-project?rev=334820&view=rev Log: [AArch64] Reverted rC334696 with Clang VCVTA test fix Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334820&r1=334819&r2=334820&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 15 03:10:45 2018 @@ -4000,6 +4000,7 @@ static const NeonIntrinsicInfo ARMSIMDIn NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), + NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0), @@ -4884,6 +4885,7 @@ Value *CodeGenFunction::EmitCommonNeonBu case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvta_u64_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: @@ -7623,6 +7625,7 @@ Value *CodeGenFunction::EmitAArch64Built return Builder.CreateFPToSI(Ops[0], Ty); } case NEON::BI__builtin_neon_vcvta_s16_v: + case NEON::BI__builtin_neon_vcvta_u16_v: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvtaq_s16_v: case NEON::BI__builtin_neon_vcvtaq_s32_v: Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=334820&r1=334819&r2=334820&view=diff == --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Fri Jun 15 03:10:45 2018 @@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } +// CHECK-LABEL: test_vcvta_u16_f16 +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a) +// CHECK: ret <4 x i16> [[VCVT]] +int16x4_t test_vcvta_u16_f16 (float16x4_t a) { + return vcvta_u16_f16(a); +} + // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] Modified: cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c?rev=334820&r1=334819&r2=334820&view=diff == --- cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Fri Jun 15 03:10:45 2018 @@ -164,6 +164,13 @@ int16x4_t test_vcvta_s16_f16 (float16x4_ return vcvta_s16_f16(a); } +// CHECK-LABEL: test_vcvta_u16_f16 +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a) +// CHECK: ret <4 x i16> [[VCVT]] +int16x4_t test_vcvta_u16_f16 (float16x4_t a) { + return vcvta_u16_f16(a); +} + // CHECK-LABEL: test_vcvtaq_s16_f16 // CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
cfe-commits@lists.llvm.org
Author: Luke Geeson Date: 2020-06-16T15:23:30+01:00 New Revision: 10b6567f49778f49ea81ff36269fc0fbc033d7ad URL: https://github.com/llvm/llvm-project/commit/10b6567f49778f49ea81ff36269fc0fbc033d7ad DIFF: https://github.com/llvm/llvm-project/commit/10b6567f49778f49ea81ff36269fc0fbc033d7ad.diff LOG: [AArch64]: BFloat MatMul Intrinsics&CodeGen This patch upstreams support for BFloat Matrix Multiplication Intrinsics and Code Generation from __bf16 to AArch64. This includes IR intrinsics. Unittests are provided as needed. AArch32 Intrinsics + CodeGen will come after this patch. This patch is part of a series implementing the Bfloat16 extension of the Armv8.6-a architecture, as detailed here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a The bfloat type, and its properties are specified in the Arm Architecture Reference Manual: https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile The following people contributed to this patch: Luke Geeson - Momchil Velikov - Mikhail Maltsev - Luke Cheeseman Reviewers: SjoerdMeijer, t.p.northover, sdesmalen, labrinea, miyuki, stuij Reviewed By: miyuki, stuij Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits, llvm-commits, miyuki, chill, pbarrio, stuij Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D80752 Change-Id: I174f0fd0f600d04e3799b06a7da88973c6c0703f Added: clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll Modified: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td Removed: diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index ffdf08ea494a..289f5ea47b92 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -244,6 +244,22 @@ def OP_SUDOT_LNQ : Op<(call "vusdot", $p0, (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>; +def OP_BFDOT_LN +: Op<(call "vbfdot", $p0, $p1, + (bitcast $p1, (call_mangled "splat_lane", (bitcast "float32x2_t", $p2), $p3)))>; + +def OP_BFDOT_LNQ +: Op<(call "vbfdot", $p0, $p1, + (bitcast $p1, (call_mangled "splat_lane", (bitcast "float32x4_t", $p2), $p3)))>; + +def OP_BFMLALB_LN +: Op<(call "vbfmlalb", $p0, $p1, + (dup_typed $p1, (call "vget_lane", $p2, $p3)))>; + +def OP_BFMLALT_LN +: Op<(call "vbfmlalt", $p0, $p1, + (dup_typed $p1, (call "vget_lane", $p2, $p3)))>; + //===--===// // Auxiliary Instructions //===--===// @@ -1847,6 +1863,25 @@ let ArchGuard = "defined(__ARM_FEATURE_MATMUL_INT8)" in { } } +let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in { + def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">; + def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>; + def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> { +let isLaneQ = 1; + } + + def VFMMLA_BF : SInst<"vbfmmla", "..BB", "Qf">; + + def VFMLALB_BF : SInst<"vbfmlalb", "..BB", "Qf">; + def VFMLALT_BF : SInst<"vbfmlalt", "..BB", "Qf">; + + def VFMLALB_LANE_BF : SOpInst<"vbfmlalb_lane", "..B(Bq)I", "Qf", OP_BFMLALB_LN>; + def VFMLALB_LANEQ_BF : SOpInst<"vbfmlalb_laneq", "..B(BQ)I", "Qf", OP_BFMLALB_LN>; + + def VFMLALT_LANE_BF : SOpInst<"vbfmlalt_lane", "..B(Bq)I", "Qf", OP_BFMLALT_LN>; + def VFMLALT_LANEQ_BF : SOpInst<"vbfmlalt_laneq", "..B(BQ)I", "Qf", OP_BFMLALT_LN>; +} + // v8.3-A Vector complex addition intrinsics let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 209b5a2b00e3..c3cfed34eeba 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4970,6 +4970,11 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0), + NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0), NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot90_v, aarch64_neon
[clang] 8bf99f1 - [ARM] Add Cortex-A77 Support for Clang and LLVM
Author: Luke Geeson Date: 2020-07-03T13:00:54+01:00 New Revision: 8bf99f1e6f0f9b426d6060361ea6d9d47c1868d1 URL: https://github.com/llvm/llvm-project/commit/8bf99f1e6f0f9b426d6060361ea6d9d47c1868d1 DIFF: https://github.com/llvm/llvm-project/commit/8bf99f1e6f0f9b426d6060361ea6d9d47c1868d1.diff LOG: [ARM] Add Cortex-A77 Support for Clang and LLVM This patch upstreams support for the Arm-v8 Cortex-A77 processor for AArch64 and ARM. In detail: - Adding cortex-a77 as a cpu option for aarch64 and arm targets in clang - Cortex-A77 CPU name and ProcessorModel in llvm details of the CPU can be found here: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a77 and a similar submission to GCC can be found here: https://github.com/gcc-mirror/gcc/commit/e0664b7a63ed8305e9f8539309df7fb3eb13babe The following people contributed to this patch: - Luke Geeson - Mikhail Maltsev Reviewers: t.p.northover, dmgreen, ostannard, SjoerdMeijer Reviewed By: dmgreen Subscribers: dmgreen, kristof.beyls, hiraditya, danielkiss, cfe-commits, llvm-commits, miyuki Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82887 Added: Modified: clang/test/Driver/aarch64-cpus.c clang/test/Driver/arm-cortex-cpus.c llvm/include/llvm/Support/AArch64TargetParser.def llvm/include/llvm/Support/ARMTargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64Subtarget.cpp llvm/lib/Target/AArch64/AArch64Subtarget.h llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/test/CodeGen/AArch64/cpus.ll llvm/test/CodeGen/AArch64/remat.ll llvm/test/MC/AArch64/armv8.2a-dotprod.s llvm/test/MC/ARM/armv8.2a-dotprod-a32.s llvm/test/MC/ARM/armv8.2a-dotprod-t32.s llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index f774e5c4d8db..53b546265f6a 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -171,6 +171,9 @@ // ARM64-CORTEX-A76: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a76" // ARM64-CORTEX-A76-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target aarch64 -mcpu=cortex-a77 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A77 %s +// CORTEX-A77: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a77" + // RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s // RUN: %clang -target aarch64_be -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index c1ce9c08ca87..d99526abe446 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -686,10 +686,12 @@ // RUN: %clang -target arm -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a76 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a76ae -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s +// RUN: %clang -target arm -mcpu=cortex-a77 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a55 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a75 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a76 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a76ae -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s +// RUN: %clang -target arm -mcpu=cortex-a77 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // // RUN: %clang -target arm -mcpu=exynos-m4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s // RUN: %clang -target arm -mcpu=exynos-m4 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV82A %s @@ -718,10 +720,12 @@ // RUN: %clang -target armeb -mcpu=cortex-a75 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s // RUN: %clang -target armeb -mcpu=cortex-a76 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s // RUN: %clang -target armeb -mcpu=cortex-a76ae -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s +// RUN: %clang -target armeb -mcpu=cortex-a77 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV82A %s // RUN: %clang -target arm -mcpu=cortex-a55 -mbig-endian -### -c %s 2>&1 | FileCheck -c
[clang] 954db63 - [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM
Author: Luke Geeson Date: 2020-07-10T18:24:11+01:00 New Revision: 954db63cd149df031d9b660bf68f0fe1de1defb9 URL: https://github.com/llvm/llvm-project/commit/954db63cd149df031d9b660bf68f0fe1de1defb9 DIFF: https://github.com/llvm/llvm-project/commit/954db63cd149df031d9b660bf68f0fe1de1defb9.diff LOG: [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM This patch upstreams support for the Arm-v8 Cortex-A78 and Cortex-X1 processors for AArch64 and ARM. In detail: - Adding cortex-a78 and cortex-x1 as cpu options for aarch64 and arm targets in clang - Adding Cortex-A78 and Cortex-X1 CPU names and ProcessorModels in llvm details of the CPU can be found here: https://www.arm.com/products/cortex-x https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78 The following people contributed to this patch: - Luke Geeson - Mikhail Maltsev Reviewers: t.p.northover, dmgreen Reviewed By: dmgreen Subscribers: dmgreen, kristof.beyls, hiraditya, danielkiss, cfe-commits, llvm-commits, miyuki Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D83206 Added: Modified: clang/test/Driver/aarch64-cpus.c clang/test/Driver/arm-cortex-cpus.c llvm/include/llvm/Support/AArch64TargetParser.def llvm/include/llvm/Support/ARMTargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64Subtarget.cpp llvm/lib/Target/AArch64/AArch64Subtarget.h llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMSubtarget.cpp llvm/lib/Target/ARM/ARMSubtarget.h llvm/test/CodeGen/AArch64/cpus.ll llvm/test/CodeGen/AArch64/remat.ll llvm/test/MC/AArch64/armv8.2a-dotprod.s llvm/test/MC/ARM/armv8.2a-dotprod-a32.s llvm/test/MC/ARM/armv8.2a-dotprod-t32.s llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 53b546265f6a..f39241bee8a6 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -173,6 +173,10 @@ // RUN: %clang -target aarch64 -mcpu=cortex-a77 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A77 %s // CORTEX-A77: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a77" +// RUN: %clang -target aarch64 -mcpu=cortex-x1 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXX1 %s +// CORTEXX1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x1" +// RUN: %clang -target aarch64 -mcpu=cortex-a78 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s +// CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78" // RUN: %clang -target aarch64_be -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s // RUN: %clang -target aarch64 -mbig-endian -mcpu=exynos-m3 -### -c %s 2>&1 | FileCheck -check-prefix=M3 %s diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index d99526abe446..6de1040e9420 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -840,6 +840,18 @@ // CHECK-CORTEX-A76AE-SOFT: "-target-feature" "+soft-float" // CHECK-CORTEX-A76AE-SOFT: "-target-feature" "+soft-float-abi" +// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-x1 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-X1 %s +// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-x1 -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-X1-MFPU %s +// CHECK-CORTEX-X1: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}} "-target-cpu" "cortex-x1" +// CHECK-CORTEX-X1-MFPU: "-cc1"{{.*}} "-target-feature" "+fp-armv8" +// CHECK-CORTEX-X1-MFPU: "-target-feature" "+crypto" + +// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a78 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A78 %s +// RUN: %clang -target armv8a-arm-none-eabi -mcpu=cortex-a78 -mfpu=crypto-neon-fp-armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-A78-MFPU %s +// CHECK-CORTEX-A78: "-cc1"{{.*}} "-triple" "armv8.2a-{{.*}} "-target-cpu" "cortex-a78" +// CHECK-CORTEX-A78-MFPU: "-cc1"{{.*}} "-target-feature" "+fp-armv8" +// CHECK-CORTEX-A78-MFPU: "-target-feature" "+crypto" + // RUN: %clang -target arm -mcpu=cortex-m23 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8MBASE %s // CHECK-CPUV8MBASE: "-cc1"{{.*}} "-triple" "thumbv8m.base- diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 66843c6e1941..13b7cfc4b5cd 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -127,6 +127,12 @@ AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AE
[clang] 832cd74 - [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics
Author: Luke Geeson Date: 2020-04-24T15:54:06+01:00 New Revision: 832cd749131b1fa59d12486325f19e16eb392a42 URL: https://github.com/llvm/llvm-project/commit/832cd749131b1fa59d12486325f19e16eb392a42 DIFF: https://github.com/llvm/llvm-project/commit/832cd749131b1fa59d12486325f19e16eb392a42.diff LOG: [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics This patch upstreams support for the Armv8.6-a Matrix Multiplication Extension. A summary of the features can be found here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a This patch includes: - Assembly support for AArch64 only (no SVE or Neon) - Intrinsics Support for AArch64 Armv8.6a Matrix Multiplication Instructions (No bfloat16 matrix multiplication) No IR types or C Types are needed for this extension. This is part of a patch series, starting with BFloat16 support and the other components in the armv8.6a extension (in previous patches linked in phabricator) Based on work by: - Luke Geeson - Oliver Stannard - Luke Cheeseman Reviewers: ostannard, t.p.northover, rengolin, kmclaughlin Reviewed By: kmclaughlin Subscribers: kmclaughlin, kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77871 Added: clang/test/CodeGen/aarch64-matmul.cpp clang/test/CodeGen/aarch64-v8.6a-neon-intrinsics.c llvm/test/CodeGen/AArch64/aarch64-matmul.ll llvm/test/MC/AArch64/armv8.6a-simd-matmul-error.s llvm/test/MC/AArch64/armv8.6a-simd-matmul.s llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt Modified: clang/include/clang/Basic/arm_neon.td clang/lib/Basic/Targets/AArch64.cpp clang/lib/Basic/Targets/AArch64.h clang/lib/CodeGen/CGBuiltin.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64Subtarget.h Removed: diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index f949edc378fc..82e44aaec69b 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -221,6 +221,21 @@ def OP_FMLAL_LN_Hi : Op<(call "vfmlal_high", $p0, $p1, def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1, (dup_typed $p1, (call "vget_lane", $p2, $p3)))>; +def OP_USDOT_LN +: Op<(call "vusdot", $p0, $p1, + (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>; +def OP_USDOT_LNQ +: Op<(call "vusdot", $p0, $p1, + (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>; + +// sudot splats the second vector and then calls vusdot +def OP_SUDOT_LN +: Op<(call "vusdot", $p0, + (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>; +def OP_SUDOT_LNQ +: Op<(call "vusdot", $p0, + (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>; + //===--===// // Auxiliary Instructions //===--===// @@ -1792,6 +1807,23 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { } } +let ArchGuard = "defined(__ARM_FEATURE_MATMUL_INT8)" in { + def VMMLA : SInst<"vmmla", "..(<<)(<<)", "QUiQi">; + def VUSMMLA : SInst<"vusmmla", "..(<; + + def VUSDOT : SInst<"vusdot", "..(<; + + def VUSDOT_LANE : SOpInst<"vusdot_lane", "..(<; + def VSUDOT_LANE : SOpInst<"vsudot_lane", "..(<<)(<; + + let ArchGuard = "defined(__aarch64__)" in { +let isLaneQ = 1 in { + def VUSDOT_LANEQ : SOpInst<"vusdot_laneq", "..(<; + def VSUDOT_LANEQ : SOpInst<"vsudot_laneq", "..(<<)(<; +} + } +} + // v8.3-A Vector complex addition intrinsics let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">; @@ -1808,4 +1840,4 @@ let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in { let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in { def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; -} \ No newline at end of file +} diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 8ceb7f2b515e..5357d31ee64c 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -280,6 +280,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasTME) Builder.defineMacro("__ARM_FEATURE_TME", "1"); + if (HasMatMul) +Builder.defineMacro("__ARM_FEATURE_MATM
[clang] 740a1dd - [ARM] Armv8.6-a Matrix Mul cmd line support
Author: Luke Geeson Date: 2020-04-24T15:54:06+01:00 New Revision: 740a1dd050eea93a875ec86780ad6ed4b0310113 URL: https://github.com/llvm/llvm-project/commit/740a1dd050eea93a875ec86780ad6ed4b0310113 DIFF: https://github.com/llvm/llvm-project/commit/740a1dd050eea93a875ec86780ad6ed4b0310113.diff LOG: [ARM] Armv8.6-a Matrix Mul cmd line support This patch upstreams support for the Armv8.6-a Matrix Multiplication Extension. A summary of the features can be found here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a This patch includes: - Command line options to enable these features with +i8mm, +f32mm, or f64mm Note: +f32mm and +f64mm are optional and so are not enabled by default This is part of a patch series, starting with BFloat16 support and the other components in the armv8.6a extension (in previous patches linked in phabricator) Based on work by: - Luke Geeson - Oliver Stannard - Luke Cheeseman Reviewers: t.p.northover, DavidSpickett Reviewed By: DavidSpickett Subscribers: DavidSpickett, ostannard, kristof.beyls, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77875 Added: clang/test/Driver/arm-matrix-multiply.c Modified: clang/lib/Driver/ToolChains/Arch/AArch64.cpp clang/test/Driver/aarch64-cpus.c llvm/include/llvm/Support/AArch64TargetParser.def llvm/include/llvm/Support/AArch64TargetParser.h llvm/include/llvm/Support/ARMTargetParser.def llvm/include/llvm/Support/ARMTargetParser.h llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index b21cfac6e7ed..4c034d40aaf4 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -54,7 +54,8 @@ std::string aarch64::getAArch64TargetCPU(const ArgList &Args, // Decode AArch64 features from string like +[no]featureA+[no]featureB+... static bool DecodeAArch64Features(const Driver &D, StringRef text, - std::vector &Features) { + std::vector &Features, + llvm::AArch64::ArchKind ArchKind) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); @@ -66,6 +67,11 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text, D.Diag(clang::diag::err_drv_no_neon_modifier); else return false; + +// +sve implies +f32mm if the base architecture is v8.6A +// it isn't the case in general that sve implies both f64mm and f32mm +if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A) && Feature == "sve") + Features.push_back("+f32mm"); } return true; } @@ -76,6 +82,7 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, std::vector &Features) { std::pair Split = Mcpu.split("+"); CPU = Split.first; + llvm::AArch64::ArchKind ArchKind = llvm::AArch64::ArchKind::ARMV8A; if (CPU == "native") CPU = llvm::sys::getHostCPUName(); @@ -83,7 +90,7 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, if (CPU == "generic") { Features.push_back("+neon"); } else { -llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseCPUArch(CPU); +ArchKind = llvm::AArch64::parseCPUArch(CPU); if (!llvm::AArch64::getArchFeatures(ArchKind, Features)) return false; @@ -92,10 +99,11 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU, return false; } - if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features)) -return false; + if (Split.second.size() && + !DecodeAArch64Features(D, Split.second, Features, ArchKind)) + return false; - return true; + return true; } static bool @@ -108,7 +116,8 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first); if (ArchKind == llvm::AArch64::ArchKind::INVALID || !llvm::AArch64::getArchFeatures(ArchKind, Features) || - (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))) + (Split.second.size() && + !DecodeAArch64Features(D, Split.second, Features, ArchKind))) return false; return true; diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 971f3cd83521..cf12a5155689 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -636,6 +636,34 @@ // RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s // GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve" +// The 8-bit integer matrix
[clang] 7da1905 - [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics
Author: Luke Geeson Date: 2020-04-24T15:54:06+01:00 New Revision: 7da19051253219d4bee2c50fe13f250201f1f7ec URL: https://github.com/llvm/llvm-project/commit/7da19051253219d4bee2c50fe13f250201f1f7ec DIFF: https://github.com/llvm/llvm-project/commit/7da19051253219d4bee2c50fe13f250201f1f7ec.diff LOG: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics This patch upstreams support for the Armv8.6-a Matrix Multiplication Extension. A summary of the features can be found here: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a This patch includes: - Assembly support for AArch32 - Intrinsics Support for AArch32 Neon Intrinsics for Matrix Multiplication Note: these extensions are optional in the 8.6a architecture and so have to be enabled by default No additional IR types or C Types are needed for this extension. This is part of a patch series, starting with BFloat16 support and the other components in the armv8.6a extension (in previous patches linked in phabricator) Based on work by: - Luke Geeson - Oliver Stannard - Luke Cheeseman Reviewers: t.p.northover, miyuki Reviewed By: miyuki Subscribers: miyuki, ostannard, kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D77872 Added: clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c llvm/test/CodeGen/ARM/arm-matmul.ll Modified: clang/lib/Basic/Targets/ARM.cpp clang/lib/Basic/Targets/ARM.h clang/lib/CodeGen/CGBuiltin.cpp llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMInstrNEON.td llvm/lib/Target/ARM/ARMPredicates.td llvm/lib/Target/ARM/ARMSubtarget.h Removed: diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 881516181538..f02a9d373609 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -425,6 +425,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector &Features, // Note that SoftFloatABI is initialized in our constructor. HWDiv = 0; DotProd = 0; + HasMatMul = 0; HasFloat16 = true; ARMCDECoprocMask = 0; @@ -491,6 +492,8 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector &Features, FPU |= FPARMV8; MVE |= MVE_INT | MVE_FP; HW_FP |= HW_FP_SP | HW_FP_HP; +} else if (Feature == "+i8mm") { + HasMatMul = 1; } else if (Feature.size() == strlen("+cdecp0") && Feature >= "+cdecp0" && Feature <= "+cdecp7") { unsigned Coproc = Feature.back() - '0'; @@ -820,6 +823,9 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, if (DotProd) Builder.defineMacro("__ARM_FEATURE_DOTPROD", "1"); + if (HasMatMul) +Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1"); + switch (ArchKind) { default: break; diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 725954038602..48d9db2ba166 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -75,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { unsigned DSP : 1; unsigned Unaligned : 1; unsigned DotProd : 1; + unsigned HasMatMul : 1; enum { LDREX_B = (1 << 0), /// byte (8-bit) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 80b27b629d1e..3a66583e20e8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4807,6 +4807,7 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), + NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), @@ -4914,6 +4915,9 @@ static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vtrnq_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), + NEONMAP1(vusdot_v, arm_neon_usdot, 0), + NEONMAP1(vusdotq_v, arm_neon_usdot, 0), + NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0), NEONMAP0(vuzp_v), NEONMAP0(vuzpq_v), NEONMAP0(vzip_v), diff --git a/clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c new file mode 100644 index ..a641197b118a --- /dev/null +++ b/clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +fullfp16 -target-feature +i8mm \ +// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -mem2reg -sroa \ +// RUN: | FileCheck %s + +// REQUIRES: arm-registered-target + +#include + +// CHECK-LABEL: test_vmmlaq_s32 +// CHECK: [[
[clang] 7d594cf - [ARM] Add Cortex-M55 Support for clang and llvm
Author: Luke Geeson Date: 2020-03-02T11:42:26Z New Revision: 7d594cf003d1325a1d85a339c03b720fe63de4c9 URL: https://github.com/llvm/llvm-project/commit/7d594cf003d1325a1d85a339c03b720fe63de4c9 DIFF: https://github.com/llvm/llvm-project/commit/7d594cf003d1325a1d85a339c03b720fe63de4c9.diff LOG: [ARM] Add Cortex-M55 Support for clang and llvm This patch upstreams support for the ARM Armv8.1m cpu Cortex-M55. In detail adding support for: - mcpu option in clang - Arm Target Features in clang - llvm Arm TargetParser definitions details of the CPU can be found here: https://developer.arm.com/ip-products/processors/cortex-m/cortex-m55 Reviewers: chill Reviewed By: chill Subscribers: dmgreen, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74966 Added: Modified: clang/test/CodeGen/arm-target-features.c clang/test/Driver/arm-cortex-cpus.c clang/test/Preprocessor/arm-target-features.c llvm/include/llvm/Support/ARMTargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Target/ARM/ARM.td llvm/test/CodeGen/ARM/build-attributes.ll llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c index 11fe4e505439..160d254c1302 100644 --- a/clang/test/CodeGen/arm-target-features.c +++ b/clang/test/CodeGen/arm-target-features.c @@ -107,4 +107,7 @@ // RUN: %clang_cc1 -triple thumb-linux-gnueabi -target-cpu cortex-m33 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARMV8M-MAIN-LINUX // CHECK-ARMV8M-MAIN-LINUX: "target-features"="+armv8-m.main,+dsp,+fp-armv8d16sp,+fp16,+hwdiv,+thumb-mode,+vfp2sp,+vfp3d16sp,+vfp4d16sp" +// RUN: %clang_cc1 -triple thumb-linux-gnueabi -target-cpu cortex-m55 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARMV81M-MAIN-LINUX +// CHECK-ARMV81M-MAIN-LINUX: "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp" + void foo() {} diff --git a/clang/test/Driver/arm-cortex-cpus.c b/clang/test/Driver/arm-cortex-cpus.c index bb2f4ec44943..12129e4ee25f 100644 --- a/clang/test/Driver/arm-cortex-cpus.c +++ b/clang/test/Driver/arm-cortex-cpus.c @@ -820,6 +820,9 @@ // CHECK-CORTEX-M33: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m33" // CHECK-CORTEX-M35P: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "cortex-m35p" +// RUN: %clang -target arm -mcpu=cortex-m55 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CORTEX-M55 %s +// CHECK-CORTEX-M55: "-cc1"{{.*}} "-triple" "thumbv8.1m.main-{{.*}} "-target-cpu" "cortex-m55" + // == Check whether -mcpu accepts mixed-case values. // RUN: %clang -target arm-linux-gnueabi -mcpu=Cortex-a5 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-A7 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CASE-INSENSITIVE-CPUV7A %s diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index 401e0a41a769..3cee4d0fc2b6 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -674,6 +674,7 @@ // RUN: %clang -target armv8m.main-none-linux-gnu -mcmse -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_CMSE %s // RUN: %clang -target arm-none-linux-gnu -mcpu=cortex-m33 -mcmse -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_CMSE %s // RUN: %clang -target arm -mcpu=cortex-m23 -mcmse -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_CMSE %s +// RUN: %clang -target arm-none-linux-gnu -mcpu=cortex-m55 -mcmse -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_CMSE %s // V8M_CMSE-NOT: __ARM_FEATURE_CMSE 1 // V8M_CMSE: #define __ARM_FEATURE_CMSE 3 @@ -726,6 +727,20 @@ // M33-ALLOW-FP-INSTR: #define __ARM_FP 0x6 // M33-ALLOW-FP-INSTR: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 +// Test whether predefines are as expected when targeting cortex-m55 (softfp FP ABI as default). +// RUN: %clang -target arm-eabi -mcpu=cortex-m55 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M55 %s +// M55: #define __ARM_ARCH 8 +// M55: #define __ARM_ARCH_8_1M_MAIN__ 1 +// M55: #define __ARM_ARCH_EXT_IDIV__ 1 +// M55-NOT: __ARM_ARCH_ISA_ARM +// M55: #define __ARM_ARCH_ISA_THUMB 2 +// M55: #define __ARM_ARCH_PROFILE 'M' +// M55-NOT: __ARM_FEATURE_CRC32 +// M55: #define __ARM_FEATURE_DSP 1 +// M55: #define __ARM_FEATURE_MVE 3 +// M55: #define __ARM_FP 0xe +// M55: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 + // Test whether predefines are as expected when targeting krait (soft FP as default). // RUN: %clang -targe
[clang] 4518aab - [AArch64] Add Cortex-A34 Support for clang and llvm
Author: Luke Geeson Date: 2020-02-18T14:56:16Z New Revision: 4518aab289a090a668af0ca4ecde976e18fb2b1e URL: https://github.com/llvm/llvm-project/commit/4518aab289a090a668af0ca4ecde976e18fb2b1e DIFF: https://github.com/llvm/llvm-project/commit/4518aab289a090a668af0ca4ecde976e18fb2b1e.diff LOG: [AArch64] Add Cortex-A34 Support for clang and llvm This patch upstreams support for the AArch64 Armv8-A cpu Cortex-A34. In detail adding support for: - mcpu option in clang - AArch64 Target Features in clang - llvm AArch64 TargetParser definitions details of the cpu can be found here: https://developer.arm.com/ip-products/processors/cortex-a/cortex-a34 Reviewers: SjoerdMeijer Reviewed By: SjoerdMeijer Subscribers: SjoerdMeijer, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74483 Change-Id: Ida101fc544ca183a0a0e61a1277c8957855fde0b Added: Modified: clang/test/Driver/aarch64-cpus.c clang/test/Preprocessor/aarch64-target-features.c llvm/include/llvm/Support/AArch64TargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Target/AArch64/AArch64.td llvm/test/CodeGen/AArch64/cpus.ll llvm/test/CodeGen/AArch64/remat.ll llvm/unittests/Support/TargetParserTest.cpp Removed: diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index c7dc5f63d678..f1b53d98e150 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -45,6 +45,22 @@ // ARM64-CA35: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a35" // ARM64-CA35-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target aarch64 -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34 %s +// RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34 %s +// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34 %s +// RUN: %clang -target aarch64 -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-TUNE %s +// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-TUNE %s +// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-TUNE %s +// CA34: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a34" +// CA34-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" + +// RUN: %clang -target arm64 -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA34 %s +// RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA34 %s +// RUN: %clang -target arm64 -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA34-TUNE %s +// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA34-TUNE %s +// ARM64-CA34: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a34" +// ARM64-CA34-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" + // RUN: %clang -target aarch64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s @@ -267,6 +283,15 @@ // CA35-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a35" // CA35-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" +// RUN: %clang -target aarch64_be -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE %s +// RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE %s +// RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE %s +// RUN: %clang -target aarch64_be -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE-TUNE %s +// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE-TUNE %s +// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a34 -### -c %s 2>&1 | FileCheck -check-prefix=CA34-BE-TUNE %s +// CA34-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a34" +// CA34-BE-TUNE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" + // RUN: %clang -target aarch64_be -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s diff --git a/clang/