[PATCH] D39638: [NVPTX] Implement __nvvm_atom_add_gen_d builtin.

Justin Lebar via Phabricator via cfe-commits Sun, 05 Nov 2017 01:08:06 -0700

jlebar updated this revision to Diff 121620.
jlebar added a comment.

Fix tests.



https://reviews.llvm.org/D39638

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-ptx50.cu
  llvm/include/llvm/IR/IntrinsicsNVVM.td
  llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
  llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
  llvm/test/CodeGen/NVPTX/atomics-sm60.ll

Index: llvm/test/CodeGen/NVPTX/atomics-sm60.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
+
+; CHECK-LABEL .func test(
+define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+; CHECK: atom.add.f64
+  %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
+; CHECK: atom.global.add.f64
+  %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d)
+; CHECK: atom.shared.add.f64
+  %ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d)
+  ret void
+}
+
+declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
+
+attributes #1 = { argmemonly nounwind }
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1095,6 +1095,12 @@
   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
 def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
   (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
+def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+  (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
 
 defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
   atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1121,6 +1127,13 @@
 defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
   atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
 
+defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
+  atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>;
+defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
+  atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>;
+defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
+  atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>;
+
 // atom_sub
 
 def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
Index: llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3449,6 +3449,7 @@
   }
 
   case Intrinsic::nvvm_atomic_load_add_f32:
+  case Intrinsic::nvvm_atomic_load_add_f64:
   case Intrinsic::nvvm_atomic_load_inc_32:
   case Intrinsic::nvvm_atomic_load_dec_32:
 
Index: llvm/include/llvm/IR/IntrinsicsNVVM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -683,10 +683,15 @@
       Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
 
 
-// Atomic not available as an llvm intrinsic.
+// Atomics not available as llvm intrinsics.
   def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
           [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
                                       [IntrArgMemOnly, NoCapture<0>]>;
+  // Atomic add of f64 requires sm_60.
+  def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty],
+          [LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty],
+                                      [IntrArgMemOnly, NoCapture<0>]>;
+
   def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
           [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
                                       [IntrArgMemOnly, NoCapture<0>]>;
Index: clang/test/CodeGen/builtins-nvptx-ptx50.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-nvptx-ptx50.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN:            -fcuda-is-device -S -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK %s
+//
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_50 \
+// RUN:   -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: test_fn
+__device__ void test_fn(double d, double* double_ptr) {
+  // CHECK: call double @llvm.nvvm.atomic.load.add.f64.p0f64
+  // expected-error@+1 {{'__nvvm_atom_add_gen_d' needs target feature satom}}
+  __nvvm_atom_add_gen_d(double_ptr, d);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -9580,6 +9580,16 @@
     return Builder.CreateCall(FnALAF32, {Ptr, Val});
   }
 
+  case NVPTX::BI__nvvm_atom_add_gen_d: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    // atomicrmw only deals with integer arguments, so we need to use
+    // LLVM's nvvm_atomic_load_add_f64 intrinsic.
+    Value *FnALAF64 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
+    return Builder.CreateCall(FnALAF64, {Ptr, Val});
+  }
+
   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
     Value *Ptr = EmitScalarExpr(E->getArg(0));
     Value *Val = EmitScalarExpr(E->getArg(1));
Index: clang/include/clang/Basic/BuiltinsNVPTX.def
===================================================================
--- clang/include/clang/Basic/BuiltinsNVPTX.def
+++ clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -481,7 +481,7 @@
 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", "satom")
 BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n")
 BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n")
-BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n")
+TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", "satom")
 TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", "satom")
 TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", "satom")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D39638: [NVPTX] Implement __nvvm_atom_add_gen_d builtin.

Reply via email to