https://github.com/JonChesterfield updated https://github.com/llvm/llvm-project/pull/131164
>From be94c9af7eaa8bc05ac9bdb80dadc285575c1472 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield <jonathanchesterfi...@gmail.com> Date: Thu, 13 Mar 2025 15:44:52 +0000 Subject: [PATCH] [Headers] Create stub spirv64intrin.h --- clang/lib/Headers/amdgpuintrin.h | 2 +- clang/lib/Headers/gpuintrin.h | 2 + clang/lib/Headers/spirvintrin.h | 132 ++++++++++++++++++ clang/test/Headers/gpuintrin.c | 223 +++++++++++++++++++++++++++++++ 4 files changed, 358 insertions(+), 1 deletion(-) create mode 100644 clang/lib/Headers/spirvintrin.h diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index f7fb8e2814180..817cfeec896c4 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -1,4 +1,4 @@ -//===-- amdgpuintrin.h - AMDPGU intrinsic functions -----------------------===// +//===-- amdgpuintrin.h - AMDGPU intrinsic functions -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 0fb3916acac61..a3ce535188a48 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -60,6 +60,8 @@ _Pragma("omp end declare target"); #include <nvptxintrin.h> #elif defined(__AMDGPU__) #include <amdgpuintrin.h> +#elif defined(__SPIRV64__) +#include <spirvintrin.h> #elif !defined(_OPENMP) #error "This header is only meant to be used on GPU architectures." #endif diff --git a/clang/lib/Headers/spirvintrin.h b/clang/lib/Headers/spirvintrin.h new file mode 100644 index 0000000000000..135be89bab406 --- /dev/null +++ b/clang/lib/Headers/spirvintrin.h @@ -0,0 +1,132 @@ +//===-- spirvintrin.h - SPIRV intrinsic functions ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __SPIRVINTRIN_H +#define __SPIRVINTRIN_H + +#ifndef __SPIRV64__ +// 32 bit SPIRV is currently a stretch goal +#error "This file is intended for SPIRV64 targets or offloading to SPIRV64" +#endif + +#ifndef __GPUINTRIN_H +#error "Never use <spirvintrin.h> directly; include <gpuintrin.h> instead" +#endif + +// This is the skeleton of the spirv implementation for gpuintrin +// Address spaces and kernel attribute are not yet implemented +// The target-specific functions are declarations waiting for clang support + +#if defined(_OPENMP) +#error "Openmp is not yet available on spirv though gpuintrin header" +#endif + +// Type aliases to the address spaces used by the SPIRV backend. +#define __gpu_private +#define __gpu_constant +#define __gpu_local +#define __gpu_global +#define __gpu_generic + +// Attribute to declare a function as a kernel. +#define __gpu_kernel + +// Returns the number of workgroups in the 'x' dimension of the grid. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_x(void); + +// Returns the number of workgroups in the 'y' dimension of the grid. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_y(void); + +// Returns the number of workgroups in the 'z' dimension of the grid. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_blocks_z(void); + +// Returns the 'x' dimension of the current AMD workgroup's id. +_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_x(void); + +// Returns the 'y' dimension of the current AMD workgroup's id. +_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_y(void); + +// Returns the 'z' dimension of the current AMD workgroup's id. +_DEFAULT_FN_ATTRS uint32_t __gpu_block_id_z(void); + +// Returns the number of workitems in the 'x' dimension. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_x(void); + +// Returns the number of workitems in the 'y' dimension. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_y(void); + +// Returns the number of workitems in the 'z' dimension. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_threads_z(void); + +// Returns the 'x' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_x(void); + +// Returns the 'y' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_y(void); + +// Returns the 'z' dimension id of the workitem in the current workgroup. +_DEFAULT_FN_ATTRS uint32_t __gpu_thread_id_z(void); + +// Returns the size of the wave. +_DEFAULT_FN_ATTRS uint32_t __gpu_num_lanes(void); + +// Returns the id of the thread inside of a wave executing together. +_DEFAULT_FN_ATTRS uint32_t __gpu_lane_id(void); + +// Returns the bit-mask of active threads in the current wave. +_DEFAULT_FN_ATTRS uint64_t __gpu_lane_mask(void); + +// Copies the value from the first active thread in the wave to the rest. +_DEFAULT_FN_ATTRS uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask, + uint32_t __x); + +// Returns a bitmask of threads in the current lane for which \p x is true. +_DEFAULT_FN_ATTRS uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x); + +// Waits for all the threads in the block to converge and issues a fence. +_DEFAULT_FN_ATTRS void __gpu_sync_threads(void); + +// Wait for all threads in the wave to converge +_DEFAULT_FN_ATTRS void __gpu_sync_lane(uint64_t __lane_mask); + +// Shuffles the the lanes inside the wave according to the given index. +_DEFAULT_FN_ATTRS uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask, + uint32_t __idx, uint32_t __x, + uint32_t __width); + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_any_u32_impl(__lane_mask, __x); +} + +// Returns a bitmask marking all lanes that have the same value of __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_any_u64_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) { + return __gpu_match_all_u32_impl(__lane_mask, __x); +} + +// Returns the current lane mask if every lane contains __x. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_match_all_u64(uint64_t __lane_mask, uint64_t __x) { + return __gpu_match_all_u64_impl(__lane_mask, __x); +} + +// Terminates execution of the associated wave. +_DEFAULT_FN_ATTRS [[noreturn]] void __gpu_exit(void); + +// Suspend the thread briefly to assist the scheduler during busy loops. +_DEFAULT_FN_ATTRS void __gpu_thread_suspend(void); + +#endif // __SPIRVINTRIN_H diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index 9a15ce277ba87..eaf001be19ac9 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -9,6 +9,11 @@ // RUN: -target-feature +ptx62 \ // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=NVPTX +// +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// RUN: -internal-isystem %S/../../lib/Headers/ \ +// RUN: -triple spirv64-- -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefix=SPIRV64 #include <gpuintrin.h> @@ -978,6 +983,224 @@ __gpu_kernel void foo() { // NVPTX-NEXT: call void @llvm.nvvm.exit() // NVPTX-NEXT: ret void // +// +// SPIRV64-LABEL: define spir_func void @foo( +// SPIRV64-SAME: ) #[[ATTR0:[0-9]+]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() +// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i32 @__gpu_num_blocks_y() +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_z() +// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_num_blocks(i32 noundef 0) +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_x() +// SPIRV64-NEXT: [[CALL5:%.*]] = call spir_func i32 @__gpu_block_id_y() +// SPIRV64-NEXT: [[CALL6:%.*]] = call spir_func i32 @__gpu_block_id_z() +// SPIRV64-NEXT: [[CALL7:%.*]] = call spir_func i32 @__gpu_block_id(i32 noundef 0) +// SPIRV64-NEXT: [[CALL8:%.*]] = call spir_func i32 @__gpu_num_threads_x() +// SPIRV64-NEXT: [[CALL9:%.*]] = call spir_func i32 @__gpu_num_threads_y() +// SPIRV64-NEXT: [[CALL10:%.*]] = call spir_func i32 @__gpu_num_threads_z() +// SPIRV64-NEXT: [[CALL11:%.*]] = call spir_func i32 @__gpu_num_threads(i32 noundef 0) +// SPIRV64-NEXT: [[CALL12:%.*]] = call spir_func i32 @__gpu_thread_id_x() +// SPIRV64-NEXT: [[CALL13:%.*]] = call spir_func i32 @__gpu_thread_id_y() +// SPIRV64-NEXT: [[CALL14:%.*]] = call spir_func i32 @__gpu_thread_id_z() +// SPIRV64-NEXT: [[CALL15:%.*]] = call spir_func i32 @__gpu_thread_id(i32 noundef 0) +// SPIRV64-NEXT: [[CALL16:%.*]] = call spir_func i32 @__gpu_num_lanes() +// SPIRV64-NEXT: [[CALL17:%.*]] = call spir_func i32 @__gpu_lane_id() +// SPIRV64-NEXT: [[CALL18:%.*]] = call spir_func i64 @__gpu_lane_mask() +// SPIRV64-NEXT: [[CALL19:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) +// SPIRV64-NEXT: [[CALL20:%.*]] = call spir_func i64 @__gpu_read_first_lane_u64(i64 noundef -1, i64 noundef -1) +// SPIRV64-NEXT: [[CALL21:%.*]] = call spir_func i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) +// SPIRV64-NEXT: call spir_func void @__gpu_sync_threads() +// SPIRV64-NEXT: call spir_func void @__gpu_sync_lane(i64 noundef -1) +// SPIRV64-NEXT: [[CALL22:%.*]] = call spir_func i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0) +// SPIRV64-NEXT: [[CALL23:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef -1) +// SPIRV64-NEXT: [[CALL24:%.*]] = call spir_func zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) +// SPIRV64-NEXT: call spir_func void @__gpu_exit() #[[ATTR4:[0-9]+]] +// SPIRV64-NEXT: unreachable +// +// +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_blocks( +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV64-NEXT: ] +// SPIRV64: [[SW_BB]]: +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_blocks_x() +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN:.*]] +// SPIRV64: [[SW_BB1]]: +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_blocks_y() +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_BB3]]: +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_blocks_z() +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_DEFAULT]]: +// SPIRV64-NEXT: unreachable +// SPIRV64: [[RETURN]]: +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_block_id( +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV64-NEXT: ] +// SPIRV64: [[SW_BB]]: +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_block_id_x() +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN:.*]] +// SPIRV64: [[SW_BB1]]: +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_block_id_y() +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_BB3]]: +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_block_id_z() +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_DEFAULT]]: +// SPIRV64-NEXT: unreachable +// SPIRV64: [[RETURN]]: +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_num_threads( +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV64-NEXT: ] +// SPIRV64: [[SW_BB]]: +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_num_threads_x() +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN:.*]] +// SPIRV64: [[SW_BB1]]: +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_num_threads_y() +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_BB3]]: +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_num_threads_z() +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_DEFAULT]]: +// SPIRV64-NEXT: unreachable +// SPIRV64: [[RETURN]]: +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV64-LABEL: define internal spir_func i32 @__gpu_thread_id( +// SPIRV64-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4 +// SPIRV64-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [ +// SPIRV64-NEXT: i32 0, label %[[SW_BB:.*]] +// SPIRV64-NEXT: i32 1, label %[[SW_BB1:.*]] +// SPIRV64-NEXT: i32 2, label %[[SW_BB3:.*]] +// SPIRV64-NEXT: ] +// SPIRV64: [[SW_BB]]: +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_thread_id_x() +// SPIRV64-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN:.*]] +// SPIRV64: [[SW_BB1]]: +// SPIRV64-NEXT: [[CALL2:%.*]] = call spir_func i32 @__gpu_thread_id_y() +// SPIRV64-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_BB3]]: +// SPIRV64-NEXT: [[CALL4:%.*]] = call spir_func i32 @__gpu_thread_id_z() +// SPIRV64-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: br label %[[RETURN]] +// SPIRV64: [[SW_DEFAULT]]: +// SPIRV64-NEXT: unreachable +// SPIRV64: [[RETURN]]: +// SPIRV64-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// SPIRV64-NEXT: ret i32 [[TMP1]] +// +// +// SPIRV64-LABEL: define internal spir_func i64 @__gpu_read_first_lane_u64( +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]], i64 noundef [[__X:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV64-NEXT: [[__X_ADDR:%.*]] = alloca i64, align 8 +// SPIRV64-NEXT: [[__HI:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: [[__LO:%.*]] = alloca i32, align 4 +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: store i64 [[__X]], ptr [[__X_ADDR]], align 8 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__X_ADDR]], align 8 +// SPIRV64-NEXT: [[SHR:%.*]] = lshr i64 [[TMP0]], 32 +// SPIRV64-NEXT: [[CONV:%.*]] = trunc i64 [[SHR]] to i32 +// SPIRV64-NEXT: store i32 [[CONV]], ptr [[__HI]], align 4 +// SPIRV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[__X_ADDR]], align 8 +// SPIRV64-NEXT: [[AND:%.*]] = and i64 [[TMP1]], 4294967295 +// SPIRV64-NEXT: [[CONV1:%.*]] = trunc i64 [[AND]] to i32 +// SPIRV64-NEXT: store i32 [[CONV1]], ptr [[__LO]], align 4 +// SPIRV64-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[TMP3:%.*]] = load i32, ptr [[__HI]], align 4 +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP2]], i32 noundef [[TMP3]]) +// SPIRV64-NEXT: [[CONV2:%.*]] = zext i32 [[CALL]] to i64 +// SPIRV64-NEXT: [[SHL:%.*]] = shl i64 [[CONV2]], 32 +// SPIRV64-NEXT: [[TMP4:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[TMP5:%.*]] = load i32, ptr [[__LO]], align 4 +// SPIRV64-NEXT: [[CALL3:%.*]] = call spir_func i32 @__gpu_read_first_lane_u32(i64 noundef [[TMP4]], i32 noundef [[TMP5]]) +// SPIRV64-NEXT: [[CONV4:%.*]] = zext i32 [[CALL3]] to i64 +// SPIRV64-NEXT: [[AND5:%.*]] = and i64 [[CONV4]], 4294967295 +// SPIRV64-NEXT: [[OR:%.*]] = or i64 [[SHL]], [[AND5]] +// SPIRV64-NEXT: ret i64 [[OR]] +// +// +// SPIRV64-LABEL: define internal spir_func i64 @__gpu_first_lane_id( +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true) +// SPIRV64-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 1 +// SPIRV64-NEXT: [[ISZERO:%.*]] = icmp eq i64 [[TMP0]], 0 +// SPIRV64-NEXT: [[FFS:%.*]] = select i1 [[ISZERO]], i64 0, i64 [[TMP2]] +// SPIRV64-NEXT: [[CAST:%.*]] = trunc i64 [[FFS]] to i32 +// SPIRV64-NEXT: [[SUB:%.*]] = sub nsw i32 [[CAST]], 1 +// SPIRV64-NEXT: [[CONV:%.*]] = sext i32 [[SUB]] to i64 +// SPIRV64-NEXT: ret i64 [[CONV]] +// +// +// SPIRV64-LABEL: define internal spir_func zeroext i1 @__gpu_is_first_in_lane( +// SPIRV64-SAME: i64 noundef [[__LANE_MASK:%.*]]) #[[ATTR0]] { +// SPIRV64-NEXT: [[ENTRY:.*:]] +// SPIRV64-NEXT: [[__LANE_MASK_ADDR:%.*]] = alloca i64, align 8 +// SPIRV64-NEXT: store i64 [[__LANE_MASK]], ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[CALL:%.*]] = call spir_func i32 @__gpu_lane_id() +// SPIRV64-NEXT: [[CONV:%.*]] = zext i32 [[CALL]] to i64 +// SPIRV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8 +// SPIRV64-NEXT: [[CALL1:%.*]] = call spir_func i64 @__gpu_first_lane_id(i64 noundef [[TMP0]]) +// SPIRV64-NEXT: [[CMP:%.*]] = icmp eq i64 [[CONV]], [[CALL1]] +// SPIRV64-NEXT: ret i1 [[CMP]] +// //. // AMDGPU: [[RNG3]] = !{i32 1, i32 0} // AMDGPU: [[META4]] = !{} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits