tra created this revision.
Herald added subscribers: hiraditya, sanjoy, jholewinski.

https://reviews.llvm.org/D40872

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/Headers/__clang_cuda_intrinsics.h
  llvm/include/llvm/IR/IntrinsicsNVVM.td
  llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
  llvm/test/CodeGen/NVPTX/fns.ll

Index: llvm/test/CodeGen/NVPTX/fns.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/NVPTX/fns.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
+
+declare i32 @llvm.nvvm.fns(i32, i32, i32)
+
+; CHECK-LABEL: .func{{.*}}fns
+define i32 @fns(i32 %mask, i32 %base, i32 %offset) {
+  ; CHECK: ld.param.u32 	[[MASK:%r[0-9]+]], [fns_param_0];
+  ; CHECK: ld.param.u32 	[[BASE:%r[0-9]+]], [fns_param_1];
+  ; CHECK: ld.param.u32 	[[OFFSET:%r[0-9]+]], [fns_param_2];
+
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]];
+  %r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], [[BASE]], 0;
+  %r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0);
+  %r01 = add i32 %r0, %r1;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]];
+  %r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, [[MASK]], 1, 0;
+  %r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0);
+  %r23 = add i32 %r2, %r3;
+  %r0123 = add i32 %r01, %r23;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]];
+  %r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, [[BASE]], 0;
+  %r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0);
+  %r45 = add i32 %r4, %r5;
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, [[OFFSET]];
+  %r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset);
+  ; CHECK:  fns.b32 	{{%r[0-9]+}}, 2, 1, 0;
+  %r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0);
+  %r67 = add i32 %r6, %r7;
+  %r4567 = add i32 %r45, %r67;
+  %r = add i32 %r0123, %r4567;
+  ret i32 %r;
+}
+
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -979,6 +979,33 @@
 def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
   Float64Regs, int_nvvm_bitcast_d2ll>;
 
+//
+// FNS
+//
+
+class INT_FNS_MBO<dag ins, dag Operands>
+  : NVPTXInst<(outs Int32Regs:$dst), ins,
+               "fns.b32 \t$dst, $mask, $base, $offset;",
+               [(set Int32Regs:$dst, Operands )]>,
+    Requires<[hasPTX60, hasSM30]>;
+
+def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
+                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
+                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
+def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
+                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
+def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
+                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
+def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
+                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
+def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
+                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
+def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
+                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
+def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
+                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
+
 //-----------------------------------
 // Atomic Functions
 //-----------------------------------
Index: llvm/include/llvm/IR/IntrinsicsNVVM.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -682,6 +682,11 @@
   def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
       Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
 
+// FNS
+
+  def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                [IntrNoMem]>;
 
 // Atomics not available as llvm intrinsics.
   def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===================================================================
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++ clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -206,6 +206,10 @@
 
 inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
 
+inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
+  return __nvvm_fns(mask, base, offset);
+}
+
 #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
 
 // Define __match* builtins CUDA-9 headers expect to see.
Index: clang/include/clang/Basic/BuiltinsNVPTX.def
===================================================================
--- clang/include/clang/Basic/BuiltinsNVPTX.def
+++ clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -371,6 +371,9 @@
 BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
 BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
 
+// FNS
+TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", "ptx60")
+
 // Sync
 
 BUILTIN(__syncthreads, "v", "")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D40872: [NVPTX, CUD... Artem Belevich via Phabricator via cfe-commits

Reply via email to