[libclc] [libclc][amdgpu] Implement native_exp via builtin (PR #133696)

Fraser Cormack via cfe-commits Mon, 31 Mar 2025 05:01:42 -0700

https://github.com/frasercrmck created 
https://github.com/llvm/llvm-project/pull/133696


This came up during a discussion on #129679, which has been split out as a 
preparatory commit.

An example of the AMDGPU codegen is:

    define <2 x float> @_Z10native_expDv2_f(<2 x float> %x) {
      %0 = extractelement <2 x float> %x, i64 0
      %mul.i4 = fmul afn float %0, 0x3FF7154760000000
      %1 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i4)
      %vecinit = insertelement <2 x float> poison, float %1, i64 0
      %2 = extractelement <2 x float> %x, i64 1
      %mul.i = fmul afn float %2, 0x3FF7154760000000
      %3 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i)
      %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1
      ret <2 x float> %vecinit2
    }

>From 7fd1ba5a68aa2580202e31e53d8c50e32bca91e2 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 31 Mar 2025 11:37:43 +0100
Subject: [PATCH] [libclc][amdgpu] Implement native_exp via builtin

This came up during a discussion on #129679, which has been split out as
a preparatory commit.

An example of the AMDGPU codegen is:

    define <2 x float> @_Z10native_expDv2_f(<2 x float> %x) {
      %0 = extractelement <2 x float> %x, i64 0
      %mul.i4 = fmul afn float %0, 0x3FF7154760000000
      %1 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i4)
      %vecinit = insertelement <2 x float> poison, float %1, i64 0
      %2 = extractelement <2 x float> %x, i64 1
      %mul.i = fmul afn float %2, 0x3FF7154760000000
      %3 = tail call afn float @llvm.amdgcn.exp2.f32(float %mul.i)
      %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1
      ret <2 x float> %vecinit2
    }
---
 libclc/amdgpu/lib/math/native_exp.cl  |  9 ++++++---
 libclc/amdgpu/lib/math/native_exp.inc | 11 -----------
 2 files changed, 6 insertions(+), 14 deletions(-)
 delete mode 100644 libclc/amdgpu/lib/math/native_exp.inc

diff --git a/libclc/amdgpu/lib/math/native_exp.cl 
b/libclc/amdgpu/lib/math/native_exp.cl
index e62b79d4ec9fa..f6ce043946488 100644
--- a/libclc/amdgpu/lib/math/native_exp.cl
+++ b/libclc/amdgpu/lib/math/native_exp.cl
@@ -7,7 +7,10 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/clc.h>
+#include <clc/clcmacro.h>
 
-#define __CLC_BODY <native_exp.inc>
-#define __FLOAT_ONLY
-#include <clc/math/gentype.inc>
+_CLC_OVERLOAD _CLC_DEF float native_exp(float val) {
+  return __builtin_amdgcn_exp2f(val * M_LOG2E_F);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, native_exp, float)
diff --git a/libclc/amdgpu/lib/math/native_exp.inc 
b/libclc/amdgpu/lib/math/native_exp.inc
deleted file mode 100644
index d7dbd888d2988..0000000000000
--- a/libclc/amdgpu/lib/math/native_exp.inc
+++ /dev/null
@@ -1,11 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_exp(__CLC_GENTYPE val) {
-  return native_exp2(val * M_LOG2E_F);
-}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc][amdgpu] Implement native_exp via builtin (PR #133696)

Reply via email to