erichkeane updated this revision to Diff 164251.
erichkeane marked 9 inline comments as done.
erichkeane added a comment.

fix aaron's comments.


https://reviews.llvm.org/D51650

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/AttrDocs.td
  include/clang/Basic/DiagnosticSemaKinds.td
  include/clang/Sema/Sema.h
  lib/AST/Decl.cpp
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  lib/Sema/SemaDecl.cpp
  lib/Sema/SemaDeclAttr.cpp
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-clones.c
  test/Misc/pragma-attribute-supported-attributes-list.test
  test/Sema/attr-target-clones.c

Index: test/Sema/attr-target-clones.c
===================================================================
--- /dev/null
+++ test/Sema/attr-target-clones.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu  -fsyntax-only -verify %s
+
+// expected-error@+1 {{'target_clones' multiversioning requires a default target}}
+void __attribute__((target_clones("sse4.2", "arch=sandybridge")))
+no_default(void);
+
+// expected-error@+2 {{'target_clones' and 'target' attributes are not compatible}}
+// expected-note@+1 {{conflicting attribute is here}}
+void __attribute__((target("sse4.2"), target_clones("arch=sandybridge")))
+ignored_attr(void);
+// expected-error@+2 {{'target' and 'target_clones' attributes are not compatible}}
+// expected-note@+1 {{conflicting attribute is here}}
+void __attribute__((target_clones("arch=sandybridge,default"), target("sse4.2")))
+ignored_attr2(void);
+
+int redecl(void);
+int __attribute__((target_clones("sse4.2", "default"))) redecl(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2", "default"))) redecl2(void);
+int __attribute__((target_clones("sse4.2", "default"))) redecl2(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2", "default"))) redecl3(void);
+int redecl3(void);
+
+int __attribute__((target_clones("sse4.2", "arch=atom", "default"))) redecl4(void);
+// expected-error@+3 {{'target_clones' attribute does not match previous declaration}}
+// expected-note@-2 {{previous declaration is here}}
+int __attribute__((target_clones("sse4.2", "arch=sandybridge", "default")))
+redecl4(void) { return 1; }
+
+int __attribute__((target("sse4.2"))) redef2(void) { return 1; }
+// expected-error@+2 {{multiversioning attributes cannot be combined}}
+// expected-note@-2 {{previous declaration is here}}
+int __attribute__((target_clones("sse4.2", "default"))) redef2(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; }
+// expected-error@+2 {{redefinition of 'redef3'}}
+// expected-note@-2 {{previous definition is here}}
+int __attribute__((target_clones("sse4.2,default"))) redef3(void) { return 1; }
+
+int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; }
+// expected-error@+2 {{redefinition of 'redef4'}}
+// expected-note@-2 {{previous definition is here}}
+int __attribute__((target_clones("sse4.2,default"))) redef4(void) { return 1; }
+
+// No error here... duplicates are allowed because they alter name mangling.
+int __attribute__((target_clones("arch=atom,arch=atom", "arch=atom,default")))
+dupes(void) { return 1; }
+
+// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("")))
+empty_target_1(void);
+// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones(",default")))
+empty_target_2(void);
+// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,")))
+empty_target_3(void);
+// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default, ,avx2")))
+empty_target_4(void);
+
+// expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string;}}
+void __attribute__((target_clones("default,avx2", "")))
+empty_target_5(void);
+
+int mv_after_use(void);
+int useage() {
+  return mv_after_use();
+}
+// expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
+int __attribute__((target_clones("sse4.2", "default"))) mv_after_use(void) { return 1; }
Index: test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- test/Misc/pragma-attribute-supported-attributes-list.test
+++ test/Misc/pragma-attribute-supported-attributes-list.test
@@ -2,7 +2,7 @@
 
 // The number of supported attributes should never go down!
 
-// CHECK: #pragma clang attribute supports 128 attributes:
+// CHECK: #pragma clang attribute supports 129 attributes:
 // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@@ -120,6 +120,7 @@
 // CHECK-NEXT: SwiftIndirectResult (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: TLSModel (SubjectMatchRule_variable_is_thread_local)
 // CHECK-NEXT: Target (SubjectMatchRule_function)
+// CHECK-NEXT: TargetClones (SubjectMatchRule_function)
 // CHECK-NEXT: TestTypestate (SubjectMatchRule_function_is_member)
 // CHECK-NEXT: TrivialABI (SubjectMatchRule_record)
 // CHECK-NEXT: VecReturn (SubjectMatchRule_record)
Index: test/CodeGen/attr-target-clones.c
===================================================================
--- /dev/null
+++ test/CodeGen/attr-target-clones.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
+// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
+// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver
+// CHECK: @unused.ifunc = ifunc void (), void ()* ()* @unused.resolver
+
+int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; }
+// CHECK: define i32 @foo.sse4.2.0()
+// CHECK: define i32 @foo.default.1()
+// CHECK: define i32 ()* @foo.resolver()
+// CHECK: ret i32 ()* @foo.sse4.2.0
+// CHECK: ret i32 ()* @foo.default.1
+
+int bar() {
+  // CHECK: define i32 @bar()
+  return foo();
+  // CHECK: call i32 @foo.ifunc()
+}
+
+inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
+foo_inline(void) { return 0; }
+// CHECK: define available_externally i32 @foo_inline.arch_sandybridge.0() #[[SB:[0-9]+]]
+// CHECK: define available_externally i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]]
+// CHECK: define available_externally i32 @foo_inline.default.2() #[[DEF:[0-9]+]]
+// CHECK: define i32 ()* @foo_inline.resolver()
+// CHECK: ret i32 ()* @foo_inline.arch_sandybridge.0
+// CHECK: ret i32 ()* @foo_inline.sse4.2.1
+// CHECK: ret i32 ()* @foo_inline.default.2
+
+int bar2() {
+  // CHECK: define i32 @bar2()
+  return foo_inline();
+  // CHECK: call i32 @foo_inline.ifunc()
+}
+
+inline __attribute__((target_clones("default,default ,sse4.2"))) void foo_decls(void) {}
+// CHECK: define available_externally void @foo_decls.sse4.2.0()
+// CHECK: define available_externally void @foo_decls.default.1()
+// CHECK: define void ()* @foo_decls.resolver()
+// CHECK: ret void ()* @foo_decls.sse4.2.0
+// CHECK: ret void ()* @foo_decls.default.1
+
+void bar3() {
+  // CHECK: define void @bar3()
+  foo_decls();
+  // CHECK: call void @foo_decls.ifunc()
+}
+
+void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {}
+// CHECK: define void @unused.arch_ivybridge.0()
+// CHECK: define void @unused.default.1()
+// CHECK: define void ()* @unused.resolver()
+// CHECK: ret void ()* @unused.arch_ivybridge.0
+// CHECK: ret void ()* @unused.default.1
+
+// CHECK: attributes #[[SSE42]] =
+// CHECK-SAME: "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: attributes #[[DEF]] =
+// CHECK-SAME: "target-features"="+mmx,+sse,+sse2,+x87"
+// CHECK: attributes #[[SB]] =
+// CHECK-SAME: "target-features"="+aes,+avx,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
Index: test/CodeGen/attr-cpuspecific.c
===================================================================
--- test/CodeGen/attr-cpuspecific.c
+++ test/CodeGen/attr-cpuspecific.c
@@ -11,8 +11,10 @@
 void SingleVersion(void){}
 // CHECK: define void @SingleVersion.S() #[[S:[0-9]+]]
 
-__attribute__((cpu_specific(ivybridge)))
-void NotCalled(void){}
+__attribute__((cpu_specific(ivybridge))) inline void InlineSingleVersion(void) {}
+// CHECK: define available_externally void @InlineSingleVersion.S() #[[S:[0-9]+]]
+
+__attribute__((cpu_specific(ivybridge))) void NotCalled(void) {}
 // CHECK: define void @NotCalled.S() #[[S]]
 
 // Done before any of the implementations.
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -2963,33 +2963,39 @@
 bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
   enum FirstParam { Unsupported, Duplicate };
   enum SecondParam { None, Architecture };
+  enum FourthParam { Target, TargetClones };
   for (auto Str : {"tune=", "fpmath="})
     if (AttrStr.find(Str) != StringRef::npos)
       return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-             << Unsupported << None << Str;
+             << Unsupported << None << Str << Target;
 
   TargetAttr::ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr);
 
   if (!ParsedAttrs.Architecture.empty() &&
       !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture))
     return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-           << Unsupported << Architecture << ParsedAttrs.Architecture;
+           << Unsupported << Architecture << ParsedAttrs.Architecture << Target;
 
   if (ParsedAttrs.DuplicateArchitecture)
     return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-           << Duplicate << None << "arch=";
+           << Duplicate << None << "arch=" << Target;
 
   for (const auto &Feature : ParsedAttrs.Features) {
     auto CurFeature = StringRef(Feature).drop_front(); // remove + or -.
     if (!Context.getTargetInfo().isValidFeatureName(CurFeature))
       return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-             << Unsupported << None << CurFeature;
+             << Unsupported << None << CurFeature << Target;
   }
 
   return false;
 }
 
 static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL) ||
+      checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL) ||
+      checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL))
+    return;
+
   StringRef Str;
   SourceLocation LiteralLoc;
   if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) ||
@@ -3002,6 +3008,78 @@
   D->addAttr(NewAttr);
 }
 
+bool Sema::checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str,
+                                 bool &HasDefault,
+                                 SmallVectorImpl<StringRef> &Strings) {
+  enum FirstParam { Unsupported, Duplicate };
+  enum SecondParam { None, Architecture };
+  enum FourthParam { Target, TargetClones };
+
+  // Warn on empty at the beginning of a string.
+  if (Str.size() == 0 || Str[0] == ',')
+    return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+           << Unsupported << None << "" << TargetClones;
+
+  while (Str.size() != 0) {
+    // Remove the comma we found last time through.
+    if (Str[0] == ',')
+      Str = Str.substr(1);
+
+    StringRef Cur{Str.data(), std::min(Str.find(','), Str.size())};
+    Str = Str.substr(Cur.size());
+    Cur = Cur.trim();
+
+    if (Cur.startswith("arch=")) {
+      if (!Context.getTargetInfo().isValidCPUName(
+              Cur.drop_front(sizeof("arch=") - 1)))
+        return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+               << Unsupported << Architecture
+               << Cur.drop_front(sizeof("arch=") - 1) << TargetClones;
+    } else if (Cur == "default") {
+      HasDefault = true;
+      continue;
+    } else if (!Context.getTargetInfo().isValidFeatureName(Cur))
+      return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << Cur << TargetClones;
+
+    Strings.push_back(Cur);
+  }
+
+  return false;
+}
+
+static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (checkAttrMutualExclusion<TargetAttr>(S, D, AL) ||
+      checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL) ||
+      checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL))
+    return;
+
+  SmallVector<StringRef, 2> Strings;
+  bool HasDefault = false;
+
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef CurStr;
+    SourceLocation LiteralLoc;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) ||
+        S.checkTargetClonesAttr(LiteralLoc, CurStr, HasDefault, Strings))
+      return;
+  }
+
+  if (!HasDefault) {
+    S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
+    return;
+  }
+
+  Strings.push_back("default");
+
+  FunctionDecl *FD = cast<FunctionDecl>(D);
+  FD->setIsMultiVersion(true);
+  unsigned Index = AL.getAttributeSpellingListIndex();
+  TargetClonesAttr *NewAttr = ::new (S.Context) TargetClonesAttr(
+      AL.getRange(), S.Context, Strings.data(), Strings.size(), Index);
+  D->addAttr(NewAttr);
+}
+
 static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   Expr *E = AL.getArgAsExpr(0);
   uint32_t VecWidth;
@@ -6308,6 +6386,9 @@
   case ParsedAttr::AT_Target:
     handleTargetAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_TargetClones:
+    handleTargetClonesAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_MinVectorWidth:
     handleMinVectorWidthAttr(S, D, AL);
     break;
Index: lib/Sema/SemaDecl.cpp
===================================================================
--- lib/Sema/SemaDecl.cpp
+++ lib/Sema/SemaDecl.cpp
@@ -9327,7 +9327,7 @@
 }
 
 namespace MultiVersioning {
-enum Type { None, Target, CPUSpecific, CPUDispatch};
+enum Type { None, Target, CPUSpecific, CPUDispatch, TargetClones };
 } // MultiVersionType
 
 static MultiVersioning::Type
@@ -9338,6 +9338,8 @@
     return MultiVersioning::CPUDispatch;
   if (FD->hasAttr<CPUSpecificAttr>())
     return MultiVersioning::CPUSpecific;
+  if (FD->hasAttr<TargetClonesAttr>())
+    return MultiVersioning::TargetClones;
   return MultiVersioning::None;
 }
 /// Check the target attribute of the function for MultiVersion
@@ -9399,9 +9401,7 @@
     Linkage = 5
   };
 
-  bool IsCPUSpecificCPUDispatchMVType =
-      MVType == MultiVersioning::CPUDispatch ||
-      MVType == MultiVersioning::CPUSpecific;
+  unsigned MVTypeForDiag = MVType - 1;
 
   if (OldFD && !OldFD->getType()->getAs<FunctionProtoType>()) {
     S.Diag(OldFD->getLocation(), diag::err_multiversion_noproto);
@@ -9424,56 +9424,56 @@
   if (CausesMV && OldFD &&
       std::distance(OldFD->attr_begin(), OldFD->attr_end()) != 1) {
     S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs)
-        << IsCPUSpecificCPUDispatchMVType;
+        << MVTypeForDiag;
     S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
     return true;
   }
 
-  if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) != 1)
+  if (std::distance(NewFD->attr_begin(), NewFD->attr_end()) > 1)
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs)
-           << IsCPUSpecificCPUDispatchMVType;
+           << MVTypeForDiag;
 
   if (NewFD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
-           << IsCPUSpecificCPUDispatchMVType << FuncTemplates;
+           << MVTypeForDiag << FuncTemplates;
 
   if (const auto *NewCXXFD = dyn_cast<CXXMethodDecl>(NewFD)) {
     if (NewCXXFD->isVirtual())
       return S.Diag(NewCXXFD->getLocation(),
                     diag::err_multiversion_doesnt_support)
-             << IsCPUSpecificCPUDispatchMVType << VirtFuncs;
+             << MVTypeForDiag << VirtFuncs;
 
     if (const auto *NewCXXCtor = dyn_cast<CXXConstructorDecl>(NewFD))
       return S.Diag(NewCXXCtor->getLocation(),
                     diag::err_multiversion_doesnt_support)
-             << IsCPUSpecificCPUDispatchMVType << Constructors;
+             << MVTypeForDiag << Constructors;
 
     if (const auto *NewCXXDtor = dyn_cast<CXXDestructorDecl>(NewFD))
       return S.Diag(NewCXXDtor->getLocation(),
                     diag::err_multiversion_doesnt_support)
-             << IsCPUSpecificCPUDispatchMVType << Destructors;
+             << MVTypeForDiag << Destructors;
   }
 
   if (NewFD->isDeleted())
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
-           << IsCPUSpecificCPUDispatchMVType << DeletedFuncs;
+           << MVTypeForDiag << DeletedFuncs;
 
   if (NewFD->isDefaulted())
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
-           << IsCPUSpecificCPUDispatchMVType << DefaultedFuncs;
+           << MVTypeForDiag << DefaultedFuncs;
 
   if (NewFD->isConstexpr() && (MVType == MultiVersioning::CPUDispatch ||
                                MVType == MultiVersioning::CPUSpecific))
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
-           << IsCPUSpecificCPUDispatchMVType << ConstexprFuncs;
+           << MVTypeForDiag << ConstexprFuncs;
 
   QualType NewQType = S.getASTContext().getCanonicalType(NewFD->getType());
   const auto *NewType = cast<FunctionType>(NewQType);
   QualType NewReturnType = NewType->getReturnType();
 
   if (NewReturnType->isUndeducedType())
     return S.Diag(NewFD->getLocation(), diag::err_multiversion_doesnt_support)
-           << IsCPUSpecificCPUDispatchMVType << DeducedReturn;
+           << MVTypeForDiag << DeducedReturn;
 
   // Only allow transition to MultiVersion if it hasn't been used.
   if (OldFD && CausesMV && OldFD->isUsed(false))
@@ -9628,15 +9628,18 @@
     Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD,
     MultiVersioning::Type NewMVType, const TargetAttr *NewTA,
     const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec,
-    bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious,
-    LookupResult &Previous) {
+    const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl,
+    bool &MergeTypeWithPrevious, LookupResult &Previous) {
 
   MultiVersioning::Type OldMVType = getMultiVersionType(OldFD);
+
   // Disallow mixing of multiversioning types.
-  if ((OldMVType == MultiVersioning::Target &&
-       NewMVType != MultiVersioning::Target) ||
-      (NewMVType == MultiVersioning::Target &&
-       OldMVType != MultiVersioning::Target)) {
+  if (OldMVType != MultiVersioning::None &&
+      NewMVType != MultiVersioning::None && OldMVType != NewMVType &&
+      !(OldMVType == MultiVersioning::CPUDispatch &&
+        NewMVType == MultiVersioning::CPUSpecific) &&
+      !(OldMVType == MultiVersioning::CPUSpecific &&
+        NewMVType == MultiVersioning::CPUDispatch)) {
     S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
     S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
     NewFD->setInvalidDecl();
@@ -9678,6 +9681,25 @@
         NewFD->setInvalidDecl();
         return true;
       }
+    } else if (NewMVType == MultiVersioning::TargetClones) {
+      const auto *CurClones = CurFD->getAttr<TargetClonesAttr>();
+      Redeclaration = true;
+      OldDecl = CurFD;
+      MergeTypeWithPrevious = true;
+      NewFD->setIsMultiVersion();
+
+      if (CurClones && NewClones &&
+          (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() ||
+           !std::equal(CurClones->featuresStrs_begin(),
+                       CurClones->featuresStrs_end(),
+                       NewClones->featuresStrs_begin()))) {
+        S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match);
+        S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
+        NewFD->setInvalidDecl();
+        return true;
+      }
+
+      return false;
     } else {
       const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>();
       const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>();
@@ -9760,7 +9782,6 @@
   return false;
 }
 
-
 /// Check the validity of a mulitversion function declaration.
 /// Also sets the multiversion'ness' of the function itself.
 ///
@@ -9774,10 +9795,12 @@
   const auto *NewTA = NewFD->getAttr<TargetAttr>();
   const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>();
   const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>();
+  const auto *NewTargetClones = NewFD->getAttr<TargetClonesAttr>();
 
   // Mixing Multiversioning types is prohibited.
-  if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) ||
-      (NewCPUDisp && NewCPUSpec)) {
+  if ((static_cast<bool>(NewTA) + static_cast<bool>(NewCPUDisp) +
+       static_cast<bool>(NewCPUSpec) + static_cast<bool>(NewTargetClones)) >
+      1) {
     S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
     NewFD->setInvalidDecl();
     return true;
@@ -9788,9 +9811,8 @@
   // Main isn't allowed to become a multiversion function, however it IS
   // permitted to have 'main' be marked with the 'target' optimization hint.
   if (NewFD->isMain()) {
-    if ((MVType == MultiVersioning::Target && NewTA->isDefaultVersion()) ||
-        MVType == MultiVersioning::CPUDispatch ||
-        MVType == MultiVersioning::CPUSpecific) {
+    if (MVType != MultiVersioning::None &&
+        (MVType != MultiVersioning::Target || NewTA->isDefaultVersion())) {
       S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
       NewFD->setInvalidDecl();
       return true;
@@ -9814,7 +9836,10 @@
   if (!OldFD->isMultiVersion() && MVType == MultiVersioning::None)
     return false;
 
-  if (OldFD->isMultiVersion() && MVType == MultiVersioning::None) {
+  // MultiVersioned redeclarations aren't allowed to omit the attribute except
+  // for target_clones.
+  if (OldFD->isMultiVersion() && MVType == MultiVersioning::None &&
+      getMultiVersionType(OldFD) != MultiVersioning::TargetClones) {
     S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl)
         << (getMultiVersionType(OldFD) != MultiVersioning::Target);
     NewFD->setInvalidDecl();
@@ -9826,8 +9851,18 @@
     return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
                                             Redeclaration, OldDecl,
                                             MergeTypeWithPrevious, Previous);
-  // Previous declarations lack CPUDispatch/CPUSpecific.
-  if (!OldFD->isMultiVersion()) {
+
+  if (!OldFD->isMultiVersion() && MVType == MultiVersioning::TargetClones) {
+    if (OldFD->isUsed(false)) {
+      NewFD->setInvalidDecl();
+      return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);
+    }
+    OldFD->setIsMultiVersion();
+  }
+
+  // Check if previous declarations lack CPUDispatch/CPUSpecific.
+  if (!OldFD->isMultiVersion() && (MVType == MultiVersioning::CPUDispatch ||
+                                   MVType == MultiVersioning::CPUSpecific)) {
     S.Diag(OldFD->getLocation(), diag::err_multiversion_required_in_redecl)
         << 1;
     S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here);
@@ -9839,8 +9874,8 @@
   // appropriate attribute in the current function decl.  Resolve that these are
   // still compatible with previous declarations.
   return CheckMultiVersionAdditionalDecl(
-      S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration,
-      OldDecl, MergeTypeWithPrevious, Previous);
+      S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewTargetClones,
+      Redeclaration, OldDecl, MergeTypeWithPrevious, Previous);
 }
 
 /// Perform semantic checking of a new function declaration.
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -1314,6 +1314,7 @@
   void EmitAliasDefinition(GlobalDecl GD);
   void emitIFuncDefinition(GlobalDecl GD);
   void emitCPUDispatchDefinition(GlobalDecl GD);
+  void EmitTargetClonesResolver(GlobalDecl GD);
   void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
   void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
 
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -917,6 +917,19 @@
   }
 }
 
+static void AppendTargetClonesMangling(const CodeGenModule &CGM,
+                                       const TargetClonesAttr *Attr,
+                                       raw_ostream &Out) {
+  Out << '.';
+  StringRef FeatureStr = Attr->getCurFeatureStr();
+  if (FeatureStr.startswith("arch="))
+    Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
+  else
+    Out << FeatureStr;
+
+  Out << '.' << Attr->ActiveArgIndex;
+}
+
 static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
                                       const NamedDecl *ND,
                                       bool OmitMultiVersionMangling = false) {
@@ -950,6 +963,8 @@
       if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
         AppendCPUSpecificCPUDispatchMangling(
             CGM, FD->getAttr<CPUSpecificAttr>(), Out);
+      else if (FD->isTargetClonesMultiVersion())
+        AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(), Out);
       else
         AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
     }
@@ -1013,12 +1028,19 @@
   // Since CPUSpecific can require multiple emits per decl, store the manglings
   // separately.
   if (FD &&
-      (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
+      (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion() ||
+       FD->isTargetClonesMultiVersion())) {
     const auto *SD = FD->getAttr<CPUSpecificAttr>();
+    const auto *TC = FD->getAttr<TargetClonesAttr>();
 
-    std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
-        CanonicalGD,
-        SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
+    unsigned VersionID = std::numeric_limits<unsigned>::max();
+
+    if (SD)
+      VersionID = SD->ActiveArgIndex;
+    else if (TC)
+      VersionID = TC->ActiveArgIndex;
+
+    std::pair<GlobalDecl, unsigned> SpecCanonicalGD{CanonicalGD, VersionID};
 
     auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
     if (FoundName != CPUSpecificMangledDeclNames.end())
@@ -1376,9 +1398,10 @@
   const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
   FD = FD ? FD->getMostRecentDecl() : FD;
   const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
-  const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
   bool AddedAttr = false;
-  if (TD || SD) {
+
+  if (FD && (TD || FD->hasAttr<CPUSpecificAttr>() ||
+             FD->hasAttr<TargetClonesAttr>())) {
     llvm::StringMap<bool> FeatureMap;
     getFunctionFeatureMap(FeatureMap, FD);
 
@@ -2111,6 +2134,9 @@
   if (Global->hasAttr<CPUDispatchAttr>())
     return emitCPUDispatchDefinition(GD);
 
+  if (Global->hasAttr<CPUSpecificAttr>() || Global->hasAttr<TargetClonesAttr>())
+    return EmitGlobalFunctionDefinition(GD, nullptr);
+
   // If this is CUDA, be selective about which declarations we emit.
   if (LangOpts.CUDA) {
     if (LangOpts.CUDAIsDevice) {
@@ -2526,6 +2552,51 @@
   CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options);
 }
 
+void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+  assert(FD && "Not a FunctionDecl?");
+  const auto *ClonesAttr = FD->getAttr<TargetClonesAttr>();
+  assert(ClonesAttr && "Not a target_clones Function?");
+  llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
+
+  // Force emission of the IFunc.
+  GetOrCreateMultiVersionIFunc(GD, DeclTy, FD);
+
+  StringRef MangledName =
+      getMangledNameImpl(*this, GD, FD, /*OmitMVMangling*/ true);
+  std::string ResolverName = (MangledName + ".resolver").str();
+  llvm::Type *ResolverType = llvm::FunctionType::get(
+      llvm::PointerType::get(DeclTy,
+                             Context.getTargetAddressSpace(FD->getType())),
+      false);
+  auto *ResolverFunc = cast<llvm::Function>(
+      GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
+                              /*ForVTable=*/false));
+
+  SmallVector<CodeGenFunction::TargetClonesMultiVersionResolverOption, 10>
+      Options;
+  const TargetInfo &Target = getTarget();
+
+  ClonesAttr->ActiveArgIndex = 0;
+  while (ClonesAttr->ActiveArgIndex < ClonesAttr->featuresStrs_size()) {
+    std::string MangledName = getMangledName(GD);
+    llvm::Constant *Func = GetOrCreateLLVMFunction(
+        MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
+        /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+
+    Options.emplace_back(Target, cast<llvm::Function>(Func),
+                         ClonesAttr->getCurFeatureStr());
+    ClonesAttr->AdvanceActiveArgIndex();
+  }
+  ClonesAttr->ActiveArgIndex = 0;
+
+  std::stable_sort(
+      Options.begin(), Options.end(),
+      std::greater<CodeGenFunction::TargetClonesMultiVersionResolverOption>());
+  CodeGenFunction CGF(*this);
+  CGF.EmitTargetClonesMultiVersionResolver(ResolverFunc, Options);
+}
+
 /// If an ifunc for the specified mangled name is not in the module, create and
 /// return an llvm IFunc Function with the specified type.
 llvm::Constant *
@@ -2540,7 +2611,8 @@
   // Since this is the first time we've created this IFunc, make sure
   // that we put this multiversioned function into the list to be
   // replaced later if necessary (target multiversioning only).
-  if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
+  if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion() &&
+      !FD->isTargetClonesMultiVersion())
     MultiVersionFuncs.push_back(GD);
 
   std::string ResolverName = MangledName + ".resolver";
@@ -3877,6 +3949,16 @@
       ++Spec->ActiveArgIndex;
       EmitGlobalFunctionDefinition(GD, nullptr);
     }
+  } else if (D->isTargetClonesMultiVersion()) {
+    auto *Clone = D->getAttr<TargetClonesAttr>();
+    // If there is another specific version we need to emit, do so here. Then,
+    // the last thing we do is emit the resolver.
+    Clone->AdvanceActiveArgIndex();
+    if (Clone->ActiveArgIndex < Clone->featuresStrs_size())
+      EmitGlobalFunctionDefinition(GD, nullptr);
+    else
+      EmitTargetClonesResolver(GD);
+    Clone->ActiveArgIndex = 0;
   }
 }
 
@@ -5264,6 +5346,15 @@
                                              FeaturesTmp);
     std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
+  } else if (const auto *Clones = FD->getAttr<TargetClonesAttr>()) {
+    StringRef Cur = Clones->getCurFeatureStr();
+    std::vector<std::string> Features;
+
+    if (Cur.startswith("arch="))
+      TargetCPU = Cur.substr(sizeof("arch=") - 1);
+    else if (Cur != "default")
+      Features.push_back((StringRef{"+"} + Cur).str());
+    Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
   } else {
     Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
                           Target.getTargetOpts().Features);
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -4313,6 +4313,27 @@
       ArrayRef<CPUDispatchMultiVersionResolverOption> Options);
   static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
 
+  struct TargetClonesMultiVersionResolverOption {
+    llvm::Function *Function;
+    StringRef FeatureStr;
+    unsigned Priority;
+    TargetClonesMultiVersionResolverOption(const TargetInfo &TargInfo,
+                                           llvm::Function *F, StringRef Feature)
+        : Function(F), FeatureStr(Feature), Priority(0u) {
+      if (FeatureStr.startswith("arch="))
+        Priority = TargInfo.multiVersionSortPriority(
+            FeatureStr.drop_front(sizeof("arch=") - 1));
+      else if (FeatureStr != "default")
+        Priority = TargInfo.multiVersionSortPriority(FeatureStr);
+    }
+    bool operator>(const TargetClonesMultiVersionResolverOption &Other) const {
+      return Priority > Other.Priority;
+    }
+  };
+  void EmitTargetClonesMultiVersionResolver(
+      llvm::Function *Resolver,
+      ArrayRef<TargetClonesMultiVersionResolverOption> Options);
+
 private:
   QualType getVarArgType(const Expr *Arg);
 
@@ -4332,6 +4353,8 @@
   llvm::Value *EmitX86CpuInit();
   llvm::Value *
   FormResolverCondition(const TargetMultiVersionResolverOption &RO);
+  llvm::Value *
+  FormResolverCondition(const TargetClonesMultiVersionResolverOption &RO);
 };
 
 inline DominatingLLVMValue::saved_type
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -2382,6 +2382,17 @@
   return TrueCondition;
 }
 
+llvm::Value *CodeGenFunction::FormResolverCondition(
+    const TargetClonesMultiVersionResolverOption &RO) {
+  if (RO.FeatureStr.startswith("arch="))
+    return EmitX86CpuIs(RO.FeatureStr.drop_front(sizeof("arch=") - 1));
+
+  if (RO.FeatureStr == "default")
+    return nullptr;
+
+  return EmitX86CpuSupports(RO.FeatureStr);
+}
+
 void CodeGenFunction::EmitTargetMultiVersionResolver(
     llvm::Function *Resolver,
     ArrayRef<TargetMultiVersionResolverOption> Options) {
@@ -2456,6 +2467,36 @@
   Builder.ClearInsertionPoint();
 }
 
+void CodeGenFunction::EmitTargetClonesMultiVersionResolver(
+    llvm::Function *Resolver,
+    ArrayRef<TargetClonesMultiVersionResolverOption> Options) {
+  assert((getContext().getTargetInfo().getTriple().getArch() ==
+              llvm::Triple::x86 ||
+          getContext().getTargetInfo().getTriple().getArch() ==
+              llvm::Triple::x86_64) &&
+         "Only implemented for x86 targets");
+  llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
+  Builder.SetInsertPoint(CurBlock);
+  EmitX86CpuInit();
+
+  for (const TargetClonesMultiVersionResolverOption &RO : Options) {
+    Builder.SetInsertPoint(CurBlock);
+    llvm::Value *TrueCondition = FormResolverCondition(RO);
+
+    if (TrueCondition) {
+      llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver);
+      llvm::IRBuilder<> RetBuilder(RetBlock);
+      RetBuilder.CreateRet(RO.Function);
+      CurBlock = createBasicBlock("ro_else", Resolver);
+      Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+    } else {
+      // Emit the default version and end emission.
+      Builder.CreateRet(RO.Function);
+      return;
+    }
+  }
+}
+
 llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
   if (CGDebugInfo *DI = getDebugInfo())
     return DI->SourceLocToDebugLoc(Location);
Index: lib/AST/Decl.cpp
===================================================================
--- lib/AST/Decl.cpp
+++ lib/AST/Decl.cpp
@@ -2928,6 +2928,9 @@
 bool FunctionDecl::isCPUSpecificMultiVersion() const {
   return isMultiVersion() && hasAttr<CPUSpecificAttr>();
 }
+bool FunctionDecl::isTargetClonesMultiVersion() const {
+  return isMultiVersion() && hasAttr<TargetClonesAttr>();
+}
 
 void
 FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) {
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -3363,6 +3363,9 @@
                                       SourceLocation *ArgLocation = nullptr);
   bool checkSectionName(SourceLocation LiteralLoc, StringRef Str);
   bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
+  bool checkTargetClonesAttr(SourceLocation LiteralLoc, StringRef Str,
+                             bool &HasDefault,
+                             SmallVectorImpl<StringRef> &Strings);
   bool checkMSInheritanceAttrOnDefinition(
       CXXRecordDecl *RD, SourceRange Range, bool BestCase,
       MSInheritanceAttr::Spelling SemanticSpelling);
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2502,7 +2502,8 @@
   "%0 attribute requires OpenCL version %1%select{| or above}2">;
 def warn_unsupported_target_attribute
     : Warning<"%select{unsupported|duplicate}0%select{| architecture}1 '%2' in"
-              " the 'target' attribute string; 'target' attribute ignored">,
+              " the '%select{target|target_clones}3' attribute string; "
+              "'%select{target|target_clones}3' attribute ignored">,
       InGroup<IgnoredAttributes>;
 def err_attribute_unsupported
     : Error<"%0 attribute is not supported for this target">;
@@ -9425,8 +9426,8 @@
 def err_multiversion_noproto : Error<
   "multiversioned function must have a prototype">;
 def err_multiversion_no_other_attrs : Error<
-  "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined"
-  " with other attributes">;
+  "attribute '%select{target|cpu_specific|cpu_dispatch|target_clones}0'"
+  " multiversioning cannot be combined with other attributes">;
 def err_multiversion_diff : Error<
   "multiversioned function declaration has a different %select{calling convention"
   "|return type|constexpr specification|inline specification|storage class|"
@@ -9453,6 +9454,11 @@
   "body of cpu_dispatch function will be ignored">,
   InGroup<FunctionMultiVersioning>;
 
+def err_target_clone_must_have_default : Error <
+  "'target_clones' multiversioning requires a default target">;
+def err_target_clone_doesnt_match : Error <
+  "'target_clones' attribute does not match previous declaration">;
+
 // three-way comparison operator diagnostics
 def err_implied_comparison_category_type_not_found : Error<
   "cannot deduce return type of 'operator<=>' because type '%0' was not found; "
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -1594,6 +1594,40 @@
 }];
 }
 
+def TargetClonesDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be
+attached to a function declaration and causes function multiversioning, where
+multiple versions of the function will be emitted with different code
+generation options.  Additionally, these versions will be resolved at runtime
+based on the priority of their attribute options. All ``target_clone`` functions
+are considered multiversioned functions.
+
+All multiversioned functions must contain a ``default`` (fallback)
+implementation, otherwise usages of the function are considered invalid.
+Additionally, a function may not become multiversioned after its first use.
+
+The options to ``target_clones`` can either be a target-specific architecture
+(specified as ``arch=CPU``), or one of a list of subtarget features.
+
+Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
+"avx", "xop" and largely correspond to the machine specific options handled by
+the front end.
+
+
+Note that unlike the ``target`` syntax, every option listed creates a new
+version, disregarding whether it is split on a comma inside or outside a string.
+The following will emit 4 versions of the function.
+
+  .. code-block:: c++
+
+    __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default")))
+    void foo() {}
+
+  }];
+}
+
 def MinVectorWidthDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -2022,6 +2022,31 @@
   }];
 }
 
+def TargetClones : InheritableAttr {
+  let Spellings = [GCC<"target_clones">];
+  let Args = [VariadicStringArgument<"featuresStrs">];
+  let Documentation = [TargetClonesDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let AdditionalMembers = [{
+    mutable unsigned ActiveArgIndex = 0;
+    void AdvanceActiveArgIndex() const {
+      ++ActiveArgIndex;
+      while(ActiveArgIndex < featuresStrs_size()) {
+        if (std::find(featuresStrs_begin(),
+                      featuresStrs_begin() + ActiveArgIndex,
+                      *(featuresStrs_begin() + ActiveArgIndex))
+            == (featuresStrs_begin() + ActiveArgIndex))
+          return;
+        ++ActiveArgIndex;
+      }
+    }
+
+    StringRef getCurFeatureStr() const {
+      return *(featuresStrs_begin() + ActiveArgIndex);
+    }
+  }];
+}
+
 def MinVectorWidth : InheritableAttr {
   let Spellings = [Clang<"min_vector_width">];
   let Args = [UnsignedArgument<"VectorWidth">];
Index: include/clang/AST/Decl.h
===================================================================
--- include/clang/AST/Decl.h
+++ include/clang/AST/Decl.h
@@ -2225,6 +2225,9 @@
   /// True if this function is a multiversioned processor specific function as a
   /// part of the cpu_specific/cpu_dispatch functionality.
   bool isCPUSpecificMultiVersion() const;
+  /// True if this function is a multiversioned function specified with the
+  /// attribute target_clones.
+  bool isTargetClonesMultiVersion() const;
 
   void setPreviousDeclaration(FunctionDecl * PrevDecl);
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to