hliao added a comment. With this revision, the following sample could be compiled with CUDA SDK and almost the same PTX code is generated.
#include <cuda.h> texture<float, cudaTextureType2D, cudaReadModeElementType> tex; #if defined(__clang__) struct v4f { float x, y, z, w; }; __device__ v4f tex_2d_ld(texture<float, cudaTextureType2D, cudaReadModeElementType>, float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32"); template <typename T> static inline __device__ T tex2D(texture<T, cudaTextureType2D, cudaReadModeElementType> t, float x, float y) { return tex_2d_ld(t, x, y).x; } #endif __device__ float foo(float x, float y) { return tex2D(tex, x, y); } Note that, clang-based one needs defining texture fetch functions as they could not be reused from CUDA SDK. That part is enclosed with `#if defined(__clang__)`. Here's the PTX code generated from NVCC. `` kernel.ptx // // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-27506705 // Cuda compilation tools, release 10.2, V10.2.89 // Based on LLVM 3.4svn // .version 6.5 .target sm_30 .address_size 64 // .globl _Z3fooff .visible .global .texref tex; .visible .func (.param .b32 func_retval0) _Z3fooff( .param .b32 _Z3fooff_param_0, .param .b32 _Z3fooff_param_1 ) { .reg .f32 %f<7>; .reg .b64 %rd<2>; ld.param.f32 %f1, [_Z3fooff_param_0]; ld.param.f32 %f2, [_Z3fooff_param_1]; tex.2d.v4.f32.f32 {%f3, %f4, %f5, %f6}, [tex, {%f1, %f2}]; st.param.f32 [func_retval0+0], %f3; ret; } Here's the PTX code generated from Clang and LLVM backend. `clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu` kernel-cuda-nvptx64-nvidia-cuda-sm_30.s // // Generated by LLVM NVPTX Back-End // .version 6.4 .target sm_30 .address_size 64 // .globl _Z3fooff .visible .global .texref tex; .visible .func (.param .b32 func_retval0) _Z3fooff( .param .b32 _Z3fooff_param_0, .param .b32 _Z3fooff_param_1 ) { .reg .f32 %f<7>; .reg .b64 %rd<2>; ld.param.f32 %f1, [_Z3fooff_param_0]; ld.param.f32 %f2, [_Z3fooff_param_1]; mov.u64 %rd1, tex; tex.2d.v4.f32.f32 {%f3, %f4, %f5, %f6}, [%rd1, {%f1, %f2}]; st.param.f32 [func_retval0+0], %f3; ret; } Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76365/new/ https://reviews.llvm.org/D76365 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits