https://github.com/wenju-he created https://github.com/llvm/llvm-project/pull/142922
llvm-diff shows no change to amdgcn--amdhsa.bc >From 42c4efab1a163624d7a3b327742d7e38bb7197b3 Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Thu, 5 Jun 2025 01:14:38 -0700 Subject: [PATCH] [NFC][libclc] Simplify clc_dot and dot implementation llvm-diff shows no change to amdgcn--amdhsa.bc --- libclc/clc/lib/generic/geometric/clc_dot.cl | 58 +------------------ libclc/clc/lib/generic/geometric/clc_dot.inc | 25 ++++++++ libclc/opencl/lib/generic/geometric/dot.cl | 60 +------------------- 3 files changed, 31 insertions(+), 112 deletions(-) create mode 100644 libclc/clc/lib/generic/geometric/clc_dot.inc diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl index 2c82798d71a50..a30f9bc6e301a 100644 --- a/libclc/clc/lib/generic/geometric/clc_dot.cl +++ b/libclc/clc/lib/generic/geometric/clc_dot.cl @@ -7,59 +7,7 @@ //===----------------------------------------------------------------------===// #include <clc/internal/clc.h> +#include <clc/math/clc_fma.h> -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; } - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) { - return p0 * p1; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; } - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#endif +#define __CLC_BODY <clc_dot.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/geometric/clc_dot.inc b/libclc/clc/lib/generic/geometric/clc_dot.inc new file mode 100644 index 0000000000000..0c89493bf7332 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_dot.inc @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \ + __CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4) + +_CLC_OVERLOAD _CLC_DEF __CLC_SCALAR_GENTYPE __clc_dot(__CLC_GENTYPE x, + __CLC_GENTYPE y) { +#if __CLC_VECSIZE_OR_1 == 1 + return x * y; +#elif __CLC_VECSIZE_OR_1 == 2 + return x.s0 * y.s0 + x.s1 * y.s1; +#elif __CLC_VECSIZE_OR_1 == 3 + return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2; +#else + return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2 + x.s3 * y.s3; +#endif +} + +#endif diff --git a/libclc/opencl/lib/generic/geometric/dot.cl b/libclc/opencl/lib/generic/geometric/dot.cl index 72d30ea53f8cf..785767f27aacf 100644 --- a/libclc/opencl/lib/generic/geometric/dot.cl +++ b/libclc/opencl/lib/generic/geometric/dot.cl @@ -9,60 +9,6 @@ #include <clc/geometric/clc_dot.h> #include <clc/opencl/clc.h> -_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { - return __clc_dot(p0, p1); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { - return __clc_dot(p0, p1); -} - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); } - -_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) { - return __clc_dot(p0, p1); -} - -#endif +#define FUNCTION dot +#define __CLC_BODY <clc/geometric/binary_def.inc> +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits