gchatelet updated this revision to Diff 198436.
gchatelet added a comment.
Herald added a subscriber: jdoerfert.

- Add documentation.
- Fix permissive HasNoRuntimeAttribute


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61634/new/

https://reviews.llvm.org/D61634

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/IRBuilder.cpp
  llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
  llvm/test/CodeGen/X86/memcpy.ll

Index: llvm/test/CodeGen/X86/memcpy.ll
===================================================================
--- llvm/test/CodeGen/X86/memcpy.ll
+++ llvm/test/CodeGen/X86/memcpy.ll
@@ -7,7 +7,7 @@
 
 
 ; Variable memcpy's should lower to calls.
-define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+define void @test1(i8* %a, i8* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test1:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -17,11 +17,11 @@
 ; DARWIN-NEXT:    jmp _memcpy ## TAILCALL
 entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 )
-	ret i8* %a
+  ret void
 }
 
 ; Variable memcpy's should lower to calls.
-define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+define void @test2(i64* %a, i64* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test2:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -33,7 +33,25 @@
 	%tmp14 = bitcast i64* %a to i8*
 	%tmp25 = bitcast i64* %b to i8*
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp14, i8* align 8 %tmp25, i64 %n, i1 0 )
-	ret i8* %tmp14
+  ret void
+}
+
+; Variable length memcpy's with disabled runtime should lower to repmovsb.
+define void @memcpy_no_runtime(i8* %a, i8* %b, i64 %n) nounwind {
+; LINUX-LABEL: memcpy_no_runtime:
+; LINUX:       # %bb.0: # %entry
+; LINUX-NEXT:    movq %rdx, %rcx
+; LINUX-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; LINUX-NEXT:    retq
+;
+; DARWIN-LABEL: memcpy_no_runtime:
+; DARWIN:       ## %bb.0: ## %entry
+; DARWIN-NEXT:    movq %rdx, %rcx
+; DARWIN-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; DARWIN-NEXT:    retq
+entry:
+	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 ) "no_runtime_for" = "memcpy"
+  ret void
 }
 
 ; Large constant memcpy's should lower to a call when optimizing for size.
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -314,5 +314,9 @@
                                   Size.getValueType(), Align, isVolatile,
                                   AlwaysInline, DstPtrInfo, SrcPtrInfo);
 
+  /// Handle runtime sizes through repmovsb when we AlwaysInline.
+  if (AlwaysInline)
+    return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
   return SDValue();
 }
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -96,6 +96,17 @@
   return II;
 }
 
+static void ForwardNoRuntimeAttribute(const Function *F,
+                                                 StringRef FunctionName,
+                                                 CallInst *CI) {
+  if (F->hasFnAttribute("no_runtime_for")) {
+    const Attribute A = F->getFnAttribute("no_runtime_for");
+    if (A.getValueAsString().contains(FunctionName)) {
+      CI->addAttribute(AttributeList::FunctionIndex, A);
+    }
+  }
+}
+
 CallInst *IRBuilderBase::
 CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -103,7 +114,8 @@
   Ptr = getCastedInt8PtrValue(Ptr);
   Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
   Type *Tys[] = { Ptr->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -121,6 +133,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardNoRuntimeAttribute(F, "memset", CI);
+
   return CI;
 }
 
@@ -165,7 +179,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -190,6 +205,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardNoRuntimeAttribute(F, "memcpy", CI);
+
   return CI;
 }
 
@@ -245,7 +262,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -266,6 +284,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardNoRuntimeAttribute(F, "memmove", CI);
+
   return CI;
 }
 
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5547,6 +5547,16 @@
   }
 }
 
+static bool HasNoRuntimeAttribute(const CallInst &I, StringRef FunctionName) {
+  if (!I.hasFnAttr("no_runtime_for"))
+    return false;
+  SmallVector<StringRef, 4> pieces;
+  I.getAttribute(AttributeList::FunctionIndex, "no_runtime_for")
+      .getValueAsString()
+      .split(pieces, ",");
+  return is_contained(pieces, FunctionName);
+}
+
 /// Lower the call to the specified intrinsic function. If we want to emit this
 /// as a call to a named external function, return the name. Otherwise, lower it
 /// and return null.
@@ -5622,8 +5632,9 @@
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
     // FIXME: Support passing different dest/src alignments to the memcpy DAG
     // node.
+    bool isAlwaysInline = HasNoRuntimeAttribute(I, "memcpy");
     SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               false, isTC,
+                               isAlwaysInline, isTC,
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MC);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -1104,6 +1104,30 @@
       cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
 }
 
+static void handleNoRuntimeFor(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (D->hasAttr<NoRuntimeForAttr>()) {
+    S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
+    return;
+  }
+
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
+    return;
+
+  std::vector<StringRef> FunctionNames;
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef FunctionName;
+    SourceLocation LiteralLoc;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, FunctionName, &LiteralLoc))
+      return;
+    // Check valid function name.
+    FunctionNames.push_back(FunctionName);
+  }
+
+  D->addAttr(::new (S.Context) NoRuntimeForAttr(
+      AL.getRange(), S.Context, FunctionNames.data(), FunctionNames.size(),
+      AL.getAttributeSpellingListIndex()));
+}
+
 static void handlePassObjectSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (D->hasAttr<PassObjectSizeAttr>()) {
     S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
@@ -6734,6 +6758,9 @@
   case ParsedAttr::AT_DiagnoseIf:
     handleDiagnoseIfAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NoRuntimeFor:
+    handleNoRuntimeFor(S, D, AL);
+    break;
   case ParsedAttr::AT_ExtVectorType:
     handleExtVectorTypeAttr(S, D, AL);
     break;
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1846,6 +1846,16 @@
       FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
     if (TargetDecl->hasAttr<ConvergentAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+    if (const auto *Attr = TargetDecl->getAttr<NoRuntimeForAttr>()) {
+      llvm::SmallVector<StringRef, 4> FunctionNames(Attr->functionNames_begin(),
+                                                    Attr->functionNames_end());
+      llvm::sort(FunctionNames);
+      FunctionNames.erase(
+          std::unique(FunctionNames.begin(), FunctionNames.end()),
+          FunctionNames.end());
+      FuncAttrs.addAttribute("no_runtime_for",
+                             llvm::join(FunctionNames, ","));
+    }
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       AddAttributesFromFunctionProtoType(
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -3738,7 +3738,7 @@
 def WebAssemblyImportModuleDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_module(<module_name>)))`` 
+Clang supports the ``__attribute__((import_module(<module_name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3755,7 +3755,7 @@
 def WebAssemblyImportNameDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_name(<name>)))`` 
+Clang supports the ``__attribute__((import_name(<name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3926,7 +3926,7 @@
 (`start_routine`) is called zero or more times by the `pthread_create` function,
 and that the fourth parameter (`arg`) is passed along. Note that the callback
 behavior of `pthread_create` is automatically recognized by Clang. In addition,
-the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for 
+the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
 `#pragma omp target teams` and `#pragma omp parallel`, respectively, are also
 automatically recognized as broker functions. Further functions might be added
 in the future.
@@ -4105,3 +4105,30 @@
 ``__attribute__((malloc))``.
 }];
 }
+
+def NoRuntimeForDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``no_runtime_for`` attribute prevents the compiler from synthesizing calls
+to specific runtime functions.
+It is useful when designing runtime functions like ``memcpy`` or Objective-C
+runtime where the compiler would otherwise be able to replace the implementation
+by a call to the runtime library: resulting in a chicken and egg problem.
+
+.. code-block:: c++
+
+  extern "C" void *memcpy(char *dst, const char *src, size_t count)
+      __attribute__((no_runtime_for("memcpy"))) {
+    #pragma clang loop vectorize(enable) interleave(enable) unroll(disable)
+    for (;count >= 4;count -= 4, dst += 4, src += 4)
+      __builtin_memcpy(dst, src, 4);
+    switch (count) {
+      case 1: __builtin_memcpy(dst, src, 1); break;
+      case 2: __builtin_memcpy(dst, src, 2); break;
+      case 3: __builtin_memcpy(dst, src, 3); break;
+      default: break;
+    }
+    return dst;
+  }
+  }];
+}
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -3237,3 +3237,10 @@
   let Subjects = SubjectList<[NonParmVar, Function, Block, ObjCMethod]>;
   let Documentation = [ObjCExternallyRetainedDocs];
 }
+
+def NoRuntimeFor : InheritableAttr {
+  let Spellings = [Clang<"no_runtime_for">];
+  let Args = [VariadicStringArgument<"FunctionNames">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [NoRuntimeForDocs];
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to