yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall, arsenm, rampitec, b-sumner.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
yaxunl requested review of this revision.
Herald added a subscriber: wdng.

Currently some amdgcn builtins are defined with long int type,
which causes invalid IR on Windows since long int is 32 bit
on Windows whereas these builtins have 64 bit arguments.

As a comparison, generic clang builtins with 64 bit int arguments
or return use long long int to avoid this issue, since long long int
is 64 bit on Linux and Windows.

This patch uses long long int instead of long int to define 64 bit int
arguments or return for amdgcn builtins.


https://reviews.llvm.org/D103563

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/test/CodeGenCUDA/builtins-amdgcn.cu

Index: clang/test/CodeGenCUDA/builtins-amdgcn.cu
===================================================================
--- clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -1,4 +1,11 @@
-// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:  -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
+// RUN:  -o - | FileCheck %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:  -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \
+// RUN:  -o - | FileCheck %s
+
 #include "Inputs/cuda.h"
 
 // CHECK-LABEL: @_Z16use_dispatch_ptrPi(
@@ -22,3 +29,32 @@
 __global__ void endpgm() {
   __builtin_amdgcn_endpgm();
 }
+
+// Check the 64 bit argument is correctly passed to the intrinsic without truncation or assertion.
+
+// CHECK-LABEL: @_Z14test_uicmp_i64
+// CHECK:  store i64* %out, i64** %out.addr.ascast
+// CHECK-NEXT:  store i64 %a, i64* %a.addr.ascast
+// CHECK-NEXT:  store i64 %b, i64* %b.addr.ascast
+// CHECK-NEXT:  %[[V0:.*]] = load i64, i64* %a.addr.ascast
+// CHECK-NEXT:  %[[V1:.*]] = load i64, i64* %b.addr.ascast
+// CHECK-NEXT:  %[[V2:.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %0, i64 %1, i32 35)
+// CHECK-NEXT:  %[[V3:.*]] = load i64*, i64** %out.addr.ascast
+// CHECK-NEXT:  store i64 %[[V2]], i64* %[[V3]]
+// CHECK-NEXT:  ret void
+__global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
+{
+  *out = __builtin_amdgcn_uicmpl(a, b, 30+5);
+}
+
+// Check the 64 bit return value is correctly returned without truncation or assertion.
+
+// CHECK-LABEL: @_Z14test_s_memtime
+// CHECK: %[[V1:.*]] = call i64 @llvm.amdgcn.s.memtime()
+// CHECK-NEXT: %[[PTR:.*]] = load i64*, i64** %out.addr.ascast
+// CHECK-NEXT:  store i64 %[[V1]], i64* %[[PTR]]
+// CHECK-NEXT:  ret void
+__global__ void test_s_memtime(unsigned long long* out)
+{
+  *out = __builtin_amdgcn_s_memtime();
+}
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -9,6 +9,10 @@
 // This file defines the AMDGPU-specific builtin function database. Users of
 // this file must define the BUILTIN macro to make use of this information.
 //
+// Note: (unsigned) long int type should be avoided in builtin definitions
+// since it has different size on Linux (64 bit) and Windows (32 bit). Use
+// (unsigned) long long int type instead, which is 64 bit on both Linux and
+// Windows.
 //===----------------------------------------------------------------------===//
 
 // The format of this database matches clang/Basic/Builtins.def.
@@ -44,14 +48,14 @@
 BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
 
-TARGET_BUILTIN(__builtin_amdgcn_s_memtime, "LUi", "n", "s-memtime-inst")
+TARGET_BUILTIN(__builtin_amdgcn_s_memtime, "LLUi", "n", "s-memtime-inst")
 
 //===----------------------------------------------------------------------===//
 // Instruction builtins.
 //===----------------------------------------------------------------------===//
 BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
 BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
-BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
+BUILTIN(__builtin_amdgcn_s_getpc, "LLUi", "n")
 BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n")
@@ -111,12 +115,12 @@
 BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
-BUILTIN(__builtin_amdgcn_uicmp, "LUiUiUiIi", "nc")
-BUILTIN(__builtin_amdgcn_uicmpl, "LUiLUiLUiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmp, "LUiiiIi", "nc")
-BUILTIN(__builtin_amdgcn_sicmpl, "LUiLiLiIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmp, "LUiddIi", "nc")
-BUILTIN(__builtin_amdgcn_fcmpf, "LUiffIi", "nc")
+BUILTIN(__builtin_amdgcn_uicmp, "LLUiUiUiIi", "nc")
+BUILTIN(__builtin_amdgcn_uicmpl, "LLUiLLUiLLUiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmp, "LLUiiiIi", "nc")
+BUILTIN(__builtin_amdgcn_sicmpl, "LLUiLLiLLiIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmp, "LLUiddIi", "nc")
+BUILTIN(__builtin_amdgcn_fcmpf, "LLUiffIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
 BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
 BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
@@ -142,9 +146,9 @@
 BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
 BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
-BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
-BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
-BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc")
+BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LLUiLLUiUiLLUi", "nc")
+BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LLUiLLUiUiLLUi", "nc")
+BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLLUiUiV4Ui", "nc")
 
 //===----------------------------------------------------------------------===//
 // CI+ only builtins.
@@ -179,7 +183,7 @@
 TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
+TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LLUi", "n", "s-memrealtime")
 TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
 TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
@@ -213,7 +217,7 @@
 //===----------------------------------------------------------------------===//
 // Special builtins.
 //===----------------------------------------------------------------------===//
-BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")
+BUILTIN(__builtin_amdgcn_read_exec, "LLUi", "nc")
 BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to