https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/133696
This came up during a discussion on #129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %x) { %0 = extractelement <2 x float> %x, i64 0 %mul.i4 = fmul afn float %0, 0x3FF7154760000000 %1 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i4) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %mul.i = fmul afn float %2, 0x3FF7154760000000 %3 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 } >From 7fd1ba5a68aa2580202e31e53d8c50e32bca91e2 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 31 Mar 2025 11:37:43 +0100 Subject: [PATCH] [libclc][amdgpu] Implement native_exp via builtin This came up during a discussion on #129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %x) { %0 = extractelement <2 x float> %x, i64 0 %mul.i4 = fmul afn float %0, 0x3FF7154760000000 %1 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i4) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %mul.i = fmul afn float %2, 0x3FF7154760000000 %3 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 } --- libclc/amdgpu/lib/math/native_exp.cl | 9 ++++++--- libclc/amdgpu/lib/math/native_exp.inc | 11 ----------- 2 files changed, 6 insertions(+), 14 deletions(-) delete mode 100644 libclc/amdgpu/lib/math/native_exp.inc diff --git a/libclc/amdgpu/lib/math/native_exp.cl b/libclc/amdgpu/lib/math/native_exp.cl index e62b79d4ec9fa..f6ce043946488 100644 --- a/libclc/amdgpu/lib/math/native_exp.cl +++ b/libclc/amdgpu/lib/math/native_exp.cl @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/clcmacro.h> -#define __CLC_BODY <native_exp.inc> -#define __FLOAT_ONLY -#include <clc/math/gentype.inc> +_CLC_OVERLOAD _CLC_DEF float native_exp(float val) { + return __builtin_amdgcn_exp2f(val * M_LOG2E_F); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, native_exp, float) diff --git a/libclc/amdgpu/lib/math/native_exp.inc b/libclc/amdgpu/lib/math/native_exp.inc deleted file mode 100644 index d7dbd888d2988..0000000000000 --- a/libclc/amdgpu/lib/math/native_exp.inc +++ /dev/null @@ -1,11 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) { - return native_exp2(val * M_LOG2E_F); -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits