https://github.com/ilinpv created https://github.com/llvm/llvm-project/pull/90928
For arch64 features, such as Branch Target Identification or MTE (Memory Tagging Extension), compatible with targets that lack their support we may encounter scenarios where a binary compiled with MTE for example is executed on both MTE and non-MTE hardware and we still need to detect at runtime whether the MTE feature is available to choose the appropriate function version. So, we cannot optimize the function multi versioning resolver by removing checks for these features enabled for the target during compilation. >From 36a72bf75ce1897ab58d422954c54dc40270f942 Mon Sep 17 00:00:00 2001 From: Pavel Iliin <pavel.il...@arm.com> Date: Wed, 1 May 2024 18:43:47 +0100 Subject: [PATCH] [FMV][AArch64] Don't optimize backward compatible features in resolver. For arch64 features, such as Branch Target Identification or MTE (Memory Tagging Extension), compatible with targets that lack their support we may encounter scenarios where a binary compiled with MTE for example is executed on both MTE and non-MTE hardware and we still need to detect at runtime whether the MTE feature is available to choose the appropriate function version. So, we cannot optimize the function multi versioning resolver by removing checks for these features enabled for the target during compilation. --- clang/lib/CodeGen/CodeGenFunction.cpp | 10 +++++-- .../test/CodeGen/attr-target-clones-aarch64.c | 29 +++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 87766a758311d5..39943ed2a415e8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2759,8 +2759,14 @@ llvm::Value *CodeGenFunction::FormAArch64ResolverCondition( const MultiVersionResolverOption &RO) { llvm::SmallVector<StringRef, 8> CondFeatures; for (const StringRef &Feature : RO.Conditions.Features) { - // Form condition for features which are not yet enabled in target - if (!getContext().getTargetInfo().hasFeature(Feature)) + // Optimize the Function Multi Versioning resolver by creating conditions + // only for features that are not enabled in the target. The exception is + // for features whose extension instructions are executed as NOP on targets + // without extension support. + if (!getContext().getTargetInfo().hasFeature(Feature) || + Feature.equals("bti") || Feature.equals("memtag") || + Feature.equals("memtag2") || Feature.equals("memtag3") || + Feature.equals("dgh")) CondFeatures.push_back(Feature); } if (!CondFeatures.empty()) { diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c index f75d8a69ebf02f..603d067864b45a 100644 --- a/clang/test/CodeGen/attr-target-clones-aarch64.c +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -526,8 +526,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: @@ -604,7 +604,24 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver( // CHECK-MTE-BTI-NEXT: resolver_entry: +// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() +// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mbti +// CHECK-MTE-BTI: resolver_else: +// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17592186044416 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17592186044416 +// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK-MTE-BTI: resolver_return1: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag2 +// CHECK-MTE-BTI: resolver_else2: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3.default // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone @@ -712,7 +729,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve // CHECK-MTE-BTI: resolver_else: +// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK-MTE-BTI: resolver_return1: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti +// CHECK-MTE-BTI: resolver_else2: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits