mzolotukhin created this revision.
mzolotukhin added reviewers: rsmith, aaron.ballman, doug.gregor, t.p.northover,
ab, mcrosier, hfinkel, majnemer.
mzolotukhin added a subscriber: cfe-commits.
Herald added a subscriber: aemerson.
Currently clang provides no general way to generate nontemporal loads/stores.
There are some architecture specific builtins for doing so (e.g. in x86), but
there is no way to generate non-temporal store on, e.g. AArch64. This patch adds
generic builtins which are expanded to a simple store with '!nontemporal'
attribute in IR.
Previously I tried to tackle the same issue by introducing
__attribute__((nontemporal)) (see D12221), but was convinced that builtins might
fit for such use better.
Does the patch look good?
Thanks,
Michael
http://reviews.llvm.org/D12313
Files:
include/clang/Basic/Builtins.def
include/clang/Basic/DiagnosticSemaKinds.td
include/clang/Sema/Sema.h
lib/CodeGen/CGBuiltin.cpp
lib/Sema/SemaChecking.cpp
test/CodeGen/Nontemporal.c
Index: test/CodeGen/Nontemporal.c
===================================================================
--- /dev/null
+++ test/CodeGen/Nontemporal.c
@@ -0,0 +1,56 @@
+// Test frontend handling of nontemporal builtins.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+
+void test_explicit_datasize (void) // CHECK-LABEL: define void @test_explicit_datasize
+{
+ __builtin_nontemporal_store(f1, &f2); // CHECK: store float{{.*}}!nontemporal
+ __builtin_nontemporal_store_f(f1, &f2); // CHECK: store float{{.*}}!nontemporal
+ __builtin_nontemporal_store_1(sc, &f2); // CHECK: store i8{{.*}}!nontemporal
+ __builtin_nontemporal_store_4(d1, &f2); // CHECK: store i32{{.*}}!nontemporal
+
+ f2 = __builtin_nontemporal_load(&f1); // CHECK: load float{{.*}}!nontemporal
+ f2 = __builtin_nontemporal_load_f(&f1); // CHECK: load float{{.*}}!nontemporal
+ sc = __builtin_nontemporal_load_1(&f2); // CHECK: load i8{{.*}}!nontemporal
+ d1 = __builtin_nontemporal_load_1(&f2); // CHECK: [[D1:%[a-z0-9._]+]] = load i8{{.*}}!nontemporal
+ // CHECK: sitofp i8 [[D1]] to double
+ si = __builtin_nontemporal_load_4(&f2); // CHECK: load i32{{.*}}!nontemporal
+ sll = __builtin_nontemporal_load_8(&f2); // CHECK: load i64{{.*}}!nontemporal
+ si = __builtin_nontemporal_load(&f1); // CHECK: [[SI:%[a-z0-9._]+]] = load float{{.*}}!nontemporal
+ // CHECK: fptosi float [[SI]] to i32
+}
+
+void test_all_sizes (void) // CHECK-LABEL: define void @test_all_sizes
+{
+ __builtin_nontemporal_store(1, &uc); // CHECK: store i8{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &sc); // CHECK: store i8{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &us); // CHECK: store i16{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &ss); // CHECK: store i16{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &ui); // CHECK: store i32{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &si); // CHECK: store i32{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &ull); // CHECK: store i64{{.*}}!nontemporal
+ __builtin_nontemporal_store(1, &sll); // CHECK: store i64{{.*}}!nontemporal
+ __builtin_nontemporal_store(1.0, &f1); // CHECK: store float{{.*}}!nontemporal
+ __builtin_nontemporal_store(1.0, &d1); // CHECK: store double{{.*}}!nontemporal
+
+ uc = __builtin_nontemporal_load(&sc); // CHECK: load i8{{.*}}!nontemporal
+ sc = __builtin_nontemporal_load(&uc); // CHECK: load i8{{.*}}!nontemporal
+ us = __builtin_nontemporal_load(&ss); // CHECK: load i16{{.*}}!nontemporal
+ ss = __builtin_nontemporal_load(&us); // CHECK: load i16{{.*}}!nontemporal
+ ui = __builtin_nontemporal_load(&si); // CHECK: load i32{{.*}}!nontemporal
+ si = __builtin_nontemporal_load(&ui); // CHECK: load i32{{.*}}!nontemporal
+ ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}!nontemporal
+ sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}!nontemporal
+ f1 = __builtin_nontemporal_load(&f2); // CHECK: load float{{.*}}!nontemporal
+ d1 = __builtin_nontemporal_load(&d2); // CHECK: load double{{.*}}!nontemporal
+}
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -440,6 +440,9 @@
case Builtin::BI__sync_swap_8:
case Builtin::BI__sync_swap_16:
return SemaBuiltinAtomicOverloaded(TheCallResult);
+ case Builtin::BI__builtin_nontemporal_load:
+ case Builtin::BI__builtin_nontemporal_store:
+ return SemaBuiltinNontemporalOverloaded(TheCallResult);
#define BUILTIN(ID, TYPE, ATTRS)
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
case Builtin::BI##ID: \
@@ -2209,6 +2212,152 @@
return TheCallResult;
}
+/// SemaBuiltinNontemporalOverloaded - We have a call to
+/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an
+/// overloaded function based on the pointer type of its last argument.
+/// The main ActOnCallExpr routines have already promoted the types of
+/// arguments because all of these calls are prototyped as void(...).
+///
+/// This function goes through and does final semantic checking for these
+/// builtins.
+ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) {
+ CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+ DeclRefExpr *DRE =
+ cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+ FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
+ unsigned BuiltinID = FDecl->getBuiltinID();
+ assert((BuiltinID == Builtin::BI__builtin_nontemporal_store ||
+ BuiltinID == Builtin::BI__builtin_nontemporal_load) &&
+ "Unexpected nontemporal load/store builtin!");
+ bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store;
+ unsigned numArgs = isStore ? 2 : 1;
+
+ // Ensure that we have at least one argument to do type inference from.
+ if (TheCall->getNumArgs() < numArgs) {
+ Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least)
+ << 0 << 1 << TheCall->getNumArgs()
+ << TheCall->getCallee()->getSourceRange();
+ return ExprError();
+ }
+
+ // Inspect the last argument of the nontemporal builtin. This should always
+ // be a pointer type, whose element is an integral scalar, float scalar, or
+ // pointer type.
+ // Because it is a pointer type, we don't have to worry about any implicit
+ // casts here.
+ Expr *AddressArg = TheCall->getArg(numArgs - 1);
+ ExprResult AddressArgResult =
+ DefaultFunctionArrayLvalueConversion(AddressArg);
+ if (AddressArgResult.isInvalid())
+ return ExprError();
+ AddressArg = AddressArgResult.get();
+ TheCall->setArg(numArgs - 1, AddressArg);
+
+ const PointerType *pointerType = AddressArg->getType()->getAs<PointerType>();
+ if (!pointerType) {
+ Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer)
+ << AddressArg->getType() << AddressArg->getSourceRange();
+ return ExprError();
+ }
+
+ QualType ValType = pointerType->getPointeeType();
+ if (!ValType->isIntegerType() && !ValType->isAnyPointerType() &&
+ !ValType->isBlockPointerType() && !ValType->isFloatingType()) {
+ Diag(DRE->getLocStart(),
+ diag::err_nontemporal_builtin_must_be_pointer_intfltptr)
+ << AddressArg->getType() << AddressArg->getSourceRange();
+ return ExprError();
+ }
+
+ // Strip any qualifiers off ValType.
+ ValType = ValType.getUnqualifiedType();
+
+#define BUILTIN_ROW(x) \
+ { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \
+ Builtin::BI##x##_8, Builtin::BI##x##_16, \
+ Builtin::BI##x##_f, Builtin::BI##x##_d }
+
+ static const unsigned BuiltinIndices[][7] = {
+ BUILTIN_ROW(__builtin_nontemporal_store),
+ BUILTIN_ROW(__builtin_nontemporal_load)
+ };
+#undef BUILTIN_ROW
+
+ // Determine the index of the size.
+ unsigned SizeIndex;
+ if (ValType->isFloatingType()) {
+ switch (Context.getTypeSizeInChars(ValType).getQuantity()) {
+ case 4: SizeIndex = 5; break;
+ case 8: SizeIndex = 6; break;
+ default:
+ Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_pointer_size)
+ << AddressArg->getType() << AddressArg->getSourceRange();
+ return ExprError();
+ }
+ } else {
+ switch (Context.getTypeSizeInChars(ValType).getQuantity()) {
+ case 1: SizeIndex = 0; break;
+ case 2: SizeIndex = 1; break;
+ case 4: SizeIndex = 2; break;
+ case 8: SizeIndex = 3; break;
+ case 16: SizeIndex = 4; break;
+ default:
+ Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_pointer_size)
+ << AddressArg->getType() << AddressArg->getSourceRange();
+ return ExprError();
+ }
+ }
+ // Get the decl for the concrete builtin from this, we can tell what the
+ // concrete integer type we should convert to is.
+ unsigned NewBuiltinID = BuiltinIndices[isStore ? 0 : 1][SizeIndex];
+ const char *NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
+
+ // Perform builtin lookup to avoid redeclaring it.
+ DeclarationName DN(&Context.Idents.get(NewBuiltinName));
+ LookupResult Res(*this, DN, DRE->getLocStart(), LookupOrdinaryName);
+ LookupName(Res, TUScope, /*AllowBuiltinCreation=*/true);
+ assert(Res.getFoundDecl());
+ FunctionDecl *NewBuiltinDecl = dyn_cast<FunctionDecl>(Res.getFoundDecl());
+ if (!NewBuiltinDecl)
+ return ExprError();
+
+ // For stores convert the value (the first argument) to the type of the
+ // pointer (the second argument).
+ if (isStore) {
+ ExprResult Arg = TheCall->getArg(0);
+ InitializedEntity Entity = InitializedEntity::InitializeParameter(
+ Context, ValType, /*consume*/ false);
+ Arg = PerformCopyInitialization(Entity, SourceLocation(), Arg);
+ if (Arg.isInvalid())
+ return ExprError();
+
+ TheCall->setArg(0, Arg.get());
+ }
+
+ ASTContext &Context = this->getASTContext();
+
+ // Create a new DeclRefExpr to refer to the new decl.
+ DeclRefExpr *NewDRE = DeclRefExpr::Create(
+ Context, DRE->getQualifierLoc(), SourceLocation(), NewBuiltinDecl,
+ /*enclosing*/ false, DRE->getLocation(), Context.BuiltinFnTy,
+ DRE->getValueKind());
+
+ // Set the callee in the CallExpr.
+ // FIXME: This loses syntactic information.
+ QualType CalleePtrTy = Context.getPointerType(NewBuiltinDecl->getType());
+ ExprResult PromotedCall =
+ ImpCastExprToType(NewDRE, CalleePtrTy, CK_BuiltinFnToFnPtr);
+ TheCall->setCallee(PromotedCall.get());
+
+ // For loads change the result type of the call to match the original value
+ // type. This is arbitrary, but the codegen for these builtins ins design to
+ // handle it gracefully.
+ if (!isStore)
+ TheCall->setType(ValType);
+
+ return TheCallResult;
+}
+
/// CheckObjCString - Checks that the argument to the builtin
/// CFString constructor is correct
/// Note: It might also make sense to do the UTF-16 conversion here (would
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -111,6 +111,44 @@
return EmitFromInt(CGF, Result, T, ValueType);
}
+static Value *MakeNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ Value *Address = CGF.EmitScalarExpr(E->getArg(1));
+
+ llvm::Type *ValueTy = Val->getType();
+ unsigned Align = CGF.getContext()
+ .getTypeSizeInChars(E->getArg(1)->getType())
+ .getQuantity();
+
+ llvm::MDNode *Node =
+ llvm::MDNode::get(CGF.getLLVMContext(),
+ llvm::ConstantAsMetadata::get(CGF.Builder.getInt32(1)));
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = CGF.Builder.CreateBitCast(
+ Address, llvm::PointerType::getUnqual(ValueTy), "cast");
+ StoreInst *SI = CGF.Builder.CreateStore(Val, BC);
+ SI->setMetadata(CGF.CGM.getModule().getMDKindID("nontemporal"), Node);
+ SI->setAlignment(Align);
+ return nullptr;
+}
+
+static Value *MakeNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Address = CGF.EmitScalarExpr(E->getArg(0));
+ unsigned Align = CGF.getContext()
+ .getTypeSizeInChars(E->getArg(0)->getType())
+ .getQuantity();
+
+ llvm::MDNode *Node =
+ llvm::MDNode::get(CGF.getLLVMContext(),
+ llvm::ConstantAsMetadata::get(CGF.Builder.getInt32(1)));
+
+ LoadInst *LI = CGF.Builder.CreateLoad(Address, "ntload");
+ LI->setMetadata(CGF.CGM.getModule().getMDKindID("nontemporal"), Node);
+ LI->setAlignment(Align);
+ return LI;
+}
+
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
llvm::AtomicRMWInst::BinOp Kind,
const CallExpr *E) {
@@ -1004,6 +1042,8 @@
case Builtin::BI__sync_lock_test_and_set:
case Builtin::BI__sync_lock_release:
case Builtin::BI__sync_swap:
+ case Builtin::BI__builtin_nontemporal_load:
+ case Builtin::BI__builtin_nontemporal_store:
llvm_unreachable("Shouldn't make it through sema");
case Builtin::BI__sync_fetch_and_add_1:
case Builtin::BI__sync_fetch_and_add_2:
@@ -1153,6 +1193,22 @@
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_nontemporal_load_1:
+ case Builtin::BI__builtin_nontemporal_load_2:
+ case Builtin::BI__builtin_nontemporal_load_4:
+ case Builtin::BI__builtin_nontemporal_load_8:
+ case Builtin::BI__builtin_nontemporal_load_16:
+ case Builtin::BI__builtin_nontemporal_load_f:
+ case Builtin::BI__builtin_nontemporal_load_d:
+ return RValue::get(MakeNontemporalLoad(*this, E));
+ case Builtin::BI__builtin_nontemporal_store_1:
+ case Builtin::BI__builtin_nontemporal_store_2:
+ case Builtin::BI__builtin_nontemporal_store_4:
+ case Builtin::BI__builtin_nontemporal_store_8:
+ case Builtin::BI__builtin_nontemporal_store_16:
+ case Builtin::BI__builtin_nontemporal_store_f:
+ case Builtin::BI__builtin_nontemporal_store_d:
+ return RValue::get(MakeNontemporalStore(*this, E));
case Builtin::BI__c11_atomic_is_lock_free:
case Builtin::BI__atomic_is_lock_free: {
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8831,6 +8831,7 @@
bool SemaBuiltinLongjmp(CallExpr *TheCall);
bool SemaBuiltinSetjmp(CallExpr *TheCall);
ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
+ ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
AtomicExpr::AtomicOp Op);
bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -6193,6 +6193,15 @@
"atomic %select{load|store}0 requires runtime support that is not "
"available for this target">;
+def err_nontemporal_builtin_must_be_pointer : Error<
+ "address argument to nontemporal builtin must be a pointer (%0 invalid)">;
+def err_nontemporal_builtin_must_be_pointer_intfltptr : Error<
+ "address argument to nontemporal builtin must be a pointer to integer, float "
+ "or pointer (%0 invalid)">;
+def err_nontemporal_builtin_pointer_size : Error<
+ "address argument to nontemporal builtin must be a pointer to 1,2,4,8 or 16 "
+ "byte type (%0 invalid)">;
+
def err_deleted_function_use : Error<"attempt to use a deleted function">;
def err_kern_type_not_void_return : Error<
Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -1244,6 +1244,25 @@
BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
+// Nontemporal loads/stores builtins
+BUILTIN(__builtin_nontemporal_store, "v.", "t")
+BUILTIN(__builtin_nontemporal_store_1, "vcc*.", "")
+BUILTIN(__builtin_nontemporal_store_2, "vss*.", "")
+BUILTIN(__builtin_nontemporal_store_4, "vii*.", "")
+BUILTIN(__builtin_nontemporal_store_8, "vLLiLLi*.", "")
+BUILTIN(__builtin_nontemporal_store_16, "vLLLiLLLi*.", "")
+BUILTIN(__builtin_nontemporal_store_f, "vff*.", "")
+BUILTIN(__builtin_nontemporal_store_d, "vdd*.", "")
+
+BUILTIN(__builtin_nontemporal_load, "v.", "t")
+BUILTIN(__builtin_nontemporal_load_1, "cc*.", "")
+BUILTIN(__builtin_nontemporal_load_2, "ss*.", "")
+BUILTIN(__builtin_nontemporal_load_4, "ii*.", "")
+BUILTIN(__builtin_nontemporal_load_8, "LLiLLi*.", "")
+BUILTIN(__builtin_nontemporal_load_16, "LLLiLLLi*.", "")
+BUILTIN(__builtin_nontemporal_load_f, "ff*.", "")
+BUILTIN(__builtin_nontemporal_load_d, "dd*.", "")
+
#undef BUILTIN
#undef LIBBUILTIN
#undef LANGBUILTIN
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits