https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/133226

>From 497781f9fe1d1750d37a36948be285cdb6c14b04 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Tue, 25 Mar 2025 18:19:38 +0000
Subject: [PATCH 1/2] [libclc] Move atan2/atan2pi to the CLC library

---
 libclc/clc/include/clc/math/clc_atan2.h     |  20 ++
 libclc/clc/include/clc/math/clc_atan2pi.h   |  20 ++
 libclc/clc/include/clc/math/tables.h        |   3 +-
 libclc/clc/lib/generic/SOURCES              |   2 +
 libclc/clc/lib/generic/math/clc_atan2.cl    |  26 ++
 libclc/clc/lib/generic/math/clc_atan2.inc   | 243 +++++++++++++++++++
 libclc/clc/lib/generic/math/clc_atan2pi.cl  |  26 ++
 libclc/clc/lib/generic/math/clc_atan2pi.inc | 227 ++++++++++++++++++
 libclc/clc/lib/generic/math/clc_tables.cl   | 177 ++++++++++++++
 libclc/generic/lib/math/atan2.cl            | 222 +----------------
 libclc/generic/lib/math/atan2pi.cl          | 206 +---------------
 libclc/generic/lib/math/tables.cl           | 253 --------------------
 12 files changed, 751 insertions(+), 674 deletions(-)
 create mode 100644 libclc/clc/include/clc/math/clc_atan2.h
 create mode 100644 libclc/clc/include/clc/math/clc_atan2pi.h
 create mode 100644 libclc/clc/lib/generic/math/clc_atan2.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_atan2.inc
 create mode 100644 libclc/clc/lib/generic/math/clc_atan2pi.cl
 create mode 100644 libclc/clc/lib/generic/math/clc_atan2pi.inc

diff --git a/libclc/clc/include/clc/math/clc_atan2.h 
b/libclc/clc/include/clc/math/clc_atan2.h
new file mode 100644
index 0000000000000..85b99d0279ee7
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_atan2.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_ATAN2_H__
+#define __CLC_MATH_CLC_ATAN2_H__
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_atan2
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_ATAN2_H__
diff --git a/libclc/clc/include/clc/math/clc_atan2pi.h 
b/libclc/clc/include/clc/math/clc_atan2pi.h
new file mode 100644
index 0000000000000..af41165f7dcf2
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_atan2pi.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_ATAN2PI_H__
+#define __CLC_MATH_CLC_ATAN2PI_H__
+
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+#define __CLC_FUNCTION __clc_atan2pi
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_ATAN2PI_H__
diff --git a/libclc/clc/include/clc/math/tables.h 
b/libclc/clc/include/clc/math/tables.h
index b801ba65bb945..6a0cd80f9c8cb 100644
--- a/libclc/clc/include/clc/math/tables.h
+++ b/libclc/clc/include/clc/math/tables.h
@@ -79,7 +79,8 @@ CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl);
 TABLE_FUNCTION_DECL(double2, ln_tbl);
 CLC_TABLE_FUNCTION_DECL(double, ln_tbl_lo);
 CLC_TABLE_FUNCTION_DECL(double, ln_tbl_hi);
-TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
+CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
+CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
 TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
 TABLE_FUNCTION_DECL(double2, sinh_tbl);
 TABLE_FUNCTION_DECL(double2, cosh_tbl);
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index dd94d97303944..d851065bb2e23 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -24,6 +24,8 @@ math/clc_asin.cl
 math/clc_asinh.cl
 math/clc_asinpi.cl
 math/clc_atan.cl
+math/clc_atan2.cl
+math/clc_atan2pi.cl
 math/clc_atanh.cl
 math/clc_atanpi.cl
 math/clc_ceil.cl
diff --git a/libclc/clc/lib/generic/math/clc_atan2.cl 
b/libclc/clc/lib/generic/math/clc_atan2.cl
new file mode 100644
index 0000000000000..b10bf32333a32
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_atan2.cl
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_ldexp.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/relational/clc_select.h>
+#include <clc/shared/clc_max.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_atan2.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_atan2.inc 
b/libclc/clc/lib/generic/math/clc_atan2.inc
new file mode 100644
index 0000000000000..0917f3adf2d90
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_atan2.inc
@@ -0,0 +1,243 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, 
__CLC_GENTYPE x) {
+  const __CLC_GENTYPE pi = 0x1.921fb6p+1f;
+  const __CLC_GENTYPE piby2 = 0x1.921fb6p+0f;
+  const __CLC_GENTYPE piby4 = 0x1.921fb6p-1f;
+  const __CLC_GENTYPE threepiby4 = 0x1.2d97c8p+1f;
+
+  __CLC_GENTYPE ax = __clc_fabs(x);
+  __CLC_GENTYPE ay = __clc_fabs(y);
+  __CLC_GENTYPE v = __clc_min(ax, ay);
+  __CLC_GENTYPE u = __clc_max(ax, ay);
+
+  // Scale since u could be large, as in "regular" divide
+  __CLC_GENTYPE s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
+  __CLC_GENTYPE vbyu = s * MATH_DIVIDE(v, s * u);
+
+  __CLC_GENTYPE vbyu2 = vbyu * vbyu;
+
+#define USE_2_2_APPROXIMATION
+#if defined USE_2_2_APPROXIMATION
+  __CLC_GENTYPE p =
+      __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f),
+                -0x1.5554d0p-2f) *
+      vbyu2 * vbyu;
+  __CLC_GENTYPE q =
+      __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
+#else
+  __CLC_GENTYPE p =
+      __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f),
+                -0x1.55554ep-2f) *
+      vbyu2 * vbyu;
+  __CLC_GENTYPE q = __clc_mad(
+      vbyu2,
+      __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.9f1304p-5f, 0x1.2656fap-1f),
+                0x1.76b4b8p+0f),
+      1.0f);
+#endif
+
+  // Octant 0 result
+  __CLC_GENTYPE a = __clc_mad(p, MATH_RECIP(q), vbyu);
+
+  // Fix up 3 other octants
+  __CLC_GENTYPE at = piby2 - a;
+  a = ay > ax ? at : a;
+  at = pi - a;
+  a = x < 0.0F ? at : a;
+
+  // y == 0 => 0 for x >= 0, pi for x < 0
+  at = __CLC_AS_INTN(x) < 0 ? pi : 0.0f;
+  a = y == 0.0f ? at : a;
+
+  // x and y are +- Inf
+  at = x > 0.0f ? piby4 : threepiby4;
+  a = __clc_select(a, at, __clc_isinf(x) && __clc_isinf(y));
+
+  // x or y is NaN
+  a = __clc_select(a, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isnan(y));
+
+  // Fixup sign and return
+  return __clc_copysign(a, y);
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, 
__CLC_GENTYPE x) {
+  const __CLC_GENTYPE pi = 3.1415926535897932e+00;    /* 0x400921fb54442d18 */
+  const __CLC_GENTYPE piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
+  const __CLC_GENTYPE piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
+  // 0x4002d97c7f3321d2
+  const __CLC_GENTYPE three_piby4 = 2.3561944901923449e+00;
+  const __CLC_GENTYPE pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 
*/
+  const __CLC_GENTYPE pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 
*/
+  // 0x3ff921fb54442d18
+  const __CLC_GENTYPE piby2_head = 1.5707963267948965e+00;
+  // 0x3c91a62633145c07
+  const __CLC_GENTYPE piby2_tail = 6.1232339957367660e-17;
+
+  __CLC_GENTYPE x2 = x;
+  // Important to capture -0.0 in xneg and yneg, so comparison done as integer
+  __CLC_LONGN xneg = __CLC_AS_LONGN(x) < 0;
+  __CLC_INTN xexp =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x) >> EXPSHIFTBITS_DP64) & 0x7ff;
+
+  __CLC_GENTYPE y2 = y;
+  __CLC_LONGN yneg = __CLC_AS_LONGN(y) < 0;
+  __CLC_INTN yexp =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y) >> EXPSHIFTBITS_DP64) & 0x7ff;
+
+  __CLC_LONGN cond2 = __CLC_CONVERT_LONGN(xexp < 1021 && yexp < 1021);
+  __CLC_LONGN diffexp = __CLC_CONVERT_LONGN(yexp - xexp);
+
+  // Scale up both x and y if they are both below 1/4
+  __CLC_GENTYPE x1 = __clc_ldexp(x, 1024);
+  __CLC_INTN xexp1 =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x1) >> EXPSHIFTBITS_DP64) & 0x7ff;
+  __CLC_GENTYPE y1 = __clc_ldexp(y, 1024);
+  __CLC_INTN yexp1 =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y1) >> EXPSHIFTBITS_DP64) & 0x7ff;
+  __CLC_LONGN diffexp1 = __CLC_CONVERT_LONGN(yexp1 - xexp1);
+
+  diffexp = __clc_select(diffexp, diffexp1, cond2);
+  x = cond2 ? x1 : x;
+  y = cond2 ? y1 : y;
+
+  // General case: take absolute values of arguments
+  __CLC_GENTYPE u = __clc_fabs(x);
+  __CLC_GENTYPE v = __clc_fabs(y);
+
+  // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
+  __CLC_LONGN swap_vu = u < v;
+  __CLC_GENTYPE uu = u;
+  u = swap_vu ? v : u;
+  v = swap_vu ? uu : v;
+
+  __CLC_GENTYPE vbyu = v / u;
+  __CLC_GENTYPE q1, q2;
+
+  // General values of v/u. Use a look-up table and series expansion.
+
+  {
+    __CLC_GENTYPE val = vbyu > 0.0625 ? vbyu : 0.063;
+    __CLC_INTN index = __CLC_CONVERT_INTN(__clc_fma(256.0, val, 0.5));
+    q1 = USE_TABLE(atan_jby256_tbl_head, index - 16);
+    q2 = USE_TABLE(atan_jby256_tbl_tail, index - 16);
+    __CLC_GENTYPE c = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-8;
+
+    // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
+    // u_exponent could be EMAX so we have to do it in 2 steps
+    __CLC_INTN m =
+        -(__CLC_CONVERT_INTN(__CLC_AS_ULONGN(u) >> EXPSHIFTBITS_DP64) -
+          EXPBIAS_DP64);
+    __CLC_GENTYPE um = __clc_ldexp(u, m);
+    __CLC_GENTYPE vm = __clc_ldexp(v, m);
+
+    // 26 leading bits of u
+    __CLC_GENTYPE u1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 
0xfffffffff8000000UL);
+    __CLC_GENTYPE u2 = um - u1;
+
+    __CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)),
+                                  __clc_fma(c, vm, um));
+
+    // Polynomial approximation to atan(r)
+    __CLC_GENTYPE s = r * r;
+    q2 = q2 + __clc_fma((s * __clc_fma(-s, 0.19999918038989143496,
+                                       0.33333333333224095522)),
+                        -r, r);
+  }
+
+  __CLC_GENTYPE q3, q4;
+  {
+    q3 = 0.0;
+    q4 = vbyu;
+  }
+
+  __CLC_GENTYPE q5, q6;
+  {
+    __CLC_GENTYPE u1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 
0xffffffff00000000UL);
+    __CLC_GENTYPE u2 = u - u1;
+    __CLC_GENTYPE vu1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 
0xffffffff00000000UL);
+    __CLC_GENTYPE vu2 = vbyu - vu1;
+
+    q5 = 0.0;
+    __CLC_GENTYPE s = vbyu * vbyu;
+    q6 = vbyu +
+         __clc_fma(
+             -vbyu * s,
+             __clc_fma(
+                 -s,
+                 __clc_fma(-s,
+                           __clc_fma(-s,
+                                     __clc_fma(-s, 0.90029810285449784439E-01,
+                                               0.11110736283514525407),
+                                     0.14285713561807169030),
+                           0.19999999999393223405),
+                 0.33333333333333170500),
+             MATH_DIVIDE(__clc_fma(-u, vu2,
+                                   __clc_fma(-u2, vu1, __clc_fma(-u1, vu1, 
v))),
+                         u));
+  }
+
+  q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
+  q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
+
+  q1 = vbyu > 0.0625 ? q1 : q3;
+  q2 = vbyu > 0.0625 ? q2 : q4;
+
+  // Tidy-up according to which quadrant the arguments lie in
+  __CLC_GENTYPE res1, res2, res3, res4;
+  q1 = swap_vu ? piby2_head - q1 : q1;
+  q2 = swap_vu ? piby2_tail - q2 : q2;
+  q1 = xneg ? pi_head - q1 : q1;
+  q2 = xneg ? pi_tail - q2 : q2;
+  q1 = q1 + q2;
+  res4 = yneg ? -q1 : q1;
+
+  res1 = yneg ? -three_piby4 : three_piby4;
+  res2 = yneg ? -piby4 : piby4;
+  res3 = xneg ? res1 : res2;
+
+  res3 = __clc_select(res4, res3,
+                      __CLC_CONVERT_LONGN(__clc_isinf(x2) && __clc_isinf(y2)));
+  res1 = yneg ? -pi : pi;
+
+  // abs(x)/abs(y) > 2^56 and x < 0
+  res3 = (diffexp < -56 && xneg) ? res1 : res3;
+
+  res4 = MATH_DIVIDE(y, x);
+  // x positive and dominant over y by a factor of 2^28
+  res3 = diffexp < -28 && xneg == 0 ? res4 : res3;
+
+  // abs(y)/abs(x) > 2^56
+  res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2
+  res3 = diffexp > 56 ? res4 : res3;
+
+  res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of 
y
+  res4 = xneg ? res1 : y2;
+
+  // Zero y gives +-0 for positive x and +-pi for negative x
+  res3 = y2 == 0.0 ? res4 : res3;
+  res3 = __clc_isnan(y2) ? y2 : res3;
+  res3 = __clc_isnan(x2) ? x2 : res3;
+
+  return res3;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE x,
+                                                 __CLC_GENTYPE y) {
+  return __CLC_CONVERT_GENTYPE(
+      __clc_atan2(__CLC_CONVERT_FLOATN(x), __CLC_CONVERT_FLOATN(y)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_atan2pi.cl 
b/libclc/clc/lib/generic/math/clc_atan2pi.cl
new file mode 100644
index 0000000000000..7f75c11d15d7b
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_atan2pi.cl
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_ldexp.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/relational/clc_select.h>
+#include <clc/shared/clc_max.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_atan2pi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_atan2pi.inc 
b/libclc/clc/lib/generic/math/clc_atan2pi.inc
new file mode 100644
index 0000000000000..79b2551e077cc
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_atan2pi.inc
@@ -0,0 +1,227 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE y,
+                                                   __CLC_GENTYPE x) {
+  const __CLC_GENTYPE pi = 0x1.921fb6p+1f;
+
+  __CLC_GENTYPE ax = __clc_fabs(x);
+  __CLC_GENTYPE ay = __clc_fabs(y);
+  __CLC_GENTYPE v = __clc_min(ax, ay);
+  __CLC_GENTYPE u = __clc_max(ax, ay);
+
+  // Scale since u could be large, as in "regular" divide
+  __CLC_GENTYPE s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
+  __CLC_GENTYPE vbyu = s * MATH_DIVIDE(v, s * u);
+
+  __CLC_GENTYPE vbyu2 = vbyu * vbyu;
+
+  __CLC_GENTYPE p =
+      __clc_mad(vbyu2, __clc_mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f),
+                -0x1.5554d0p-2f) *
+      vbyu2 * vbyu;
+  __CLC_GENTYPE q =
+      __clc_mad(vbyu2, __clc_mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
+
+  // Octant 0 result
+  __CLC_GENTYPE a = MATH_DIVIDE(__clc_mad(p, MATH_RECIP(q), vbyu), pi);
+
+  // Fix up 3 other octants
+  __CLC_GENTYPE at = 0.5f - a;
+  a = ay > ax ? at : a;
+  at = 1.0f - a;
+  a = x < 0.0F ? at : a;
+
+  // y == 0 => 0 for x >= 0, pi for x < 0
+  at = __CLC_AS_INTN(x) < 0 ? 1.0f : 0.0f;
+  a = y == 0.0f ? at : a;
+
+  // x and y are +- Inf
+  at = x > 0.0f ? 0.25f : 0.75f;
+  a = __clc_select(a, at, __clc_isinf(x) && __clc_isinf(y));
+
+  // x or y is NaN
+  a = __clc_select(a, __CLC_GENTYPE_NAN, __clc_isnan(x) || __clc_isnan(y));
+
+  // Fixup sign and return
+  return __clc_copysign(a, y);
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE y,
+                                                   __CLC_GENTYPE x) {
+  const __CLC_GENTYPE pi = 3.1415926535897932e+00;      /* 0x400921fb54442d18 
*/
+  const __CLC_GENTYPE pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 
*/
+  const __CLC_GENTYPE pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 
*/
+  // 0x3ff921fb54442d18
+  const __CLC_GENTYPE piby2_head = 1.5707963267948965e+00;
+  // 0x3c91a62633145c07
+  const __CLC_GENTYPE piby2_tail = 6.1232339957367660e-17;
+
+  __CLC_GENTYPE x2 = x;
+  __CLC_LONGN xneg = __CLC_AS_LONGN(x) < 0;
+  __CLC_INTN xexp =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x) >> EXPSHIFTBITS_DP64) & 0x7ff;
+
+  __CLC_GENTYPE y2 = y;
+  __CLC_LONGN yneg = __CLC_AS_LONGN(y) < 0;
+  __CLC_INTN yexp =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y) >> EXPSHIFTBITS_DP64) & 0x7ff;
+
+  __CLC_LONGN cond2 = __CLC_CONVERT_LONGN(xexp < 1021 & yexp < 1021);
+  __CLC_LONGN diffexp = __CLC_CONVERT_LONGN(yexp - xexp);
+
+  // Scale up both x and y if they are both below 1/4
+  __CLC_GENTYPE x1 = __clc_ldexp(x, 1024);
+  __CLC_INTN xexp1 =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(x1) >> EXPSHIFTBITS_DP64) & 0x7ff;
+  __CLC_GENTYPE y1 = __clc_ldexp(y, 1024);
+  __CLC_INTN yexp1 =
+      __CLC_CONVERT_INTN(__CLC_AS_ULONGN(y1) >> EXPSHIFTBITS_DP64) & 0x7ff;
+  __CLC_LONGN diffexp1 = __CLC_CONVERT_LONGN(yexp1 - xexp1);
+
+  diffexp = __clc_select(diffexp, diffexp1, cond2);
+  x = cond2 ? x1 : x;
+  y = cond2 ? y1 : y;
+
+  // General case: take absolute values of arguments
+  __CLC_GENTYPE u = __clc_fabs(x);
+  __CLC_GENTYPE v = __clc_fabs(y);
+
+  // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
+  __CLC_LONGN swap_vu = u < v;
+  __CLC_GENTYPE uu = u;
+  u = swap_vu ? v : u;
+  v = swap_vu ? uu : v;
+
+  __CLC_GENTYPE vbyu = v / u;
+  __CLC_GENTYPE q1, q2;
+
+  // General values of v/u. Use a look-up table and series expansion.
+
+  {
+    __CLC_GENTYPE val = vbyu > 0.0625 ? vbyu : 0.063;
+    __CLC_INTN index = __CLC_CONVERT_INTN(__clc_fma(256.0, val, 0.5));
+    q1 = USE_TABLE(atan_jby256_tbl_head, (index - 16));
+    q2 = USE_TABLE(atan_jby256_tbl_tail, (index - 16));
+    __CLC_GENTYPE c = __CLC_CONVERT_GENTYPE(index) * 0x1.0p-8;
+
+    // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
+    // u_exponent could be EMAX so we have to do it in 2 steps
+    __CLC_INTN m =
+        -(__CLC_CONVERT_INTN(__CLC_AS_ULONGN(u) >> EXPSHIFTBITS_DP64) -
+          EXPBIAS_DP64);
+    __CLC_GENTYPE um = __clc_ldexp(u, m);
+    __CLC_GENTYPE vm = __clc_ldexp(v, m);
+
+    // 26 leading bits of u
+    __CLC_GENTYPE u1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 0xfffffffff8000000UL);
+    __CLC_GENTYPE u2 = um - u1;
+
+    __CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)),
+                                  __clc_fma(c, vm, um));
+
+    // Polynomial approximation to atan(r)
+    __CLC_GENTYPE s = r * r;
+    q2 = q2 + __clc_fma((s * __clc_fma(-s, 0.19999918038989143496,
+                                       0.33333333333224095522)),
+                        -r, r);
+  }
+
+  __CLC_GENTYPE q3, q4;
+  {
+    q3 = 0.0;
+    q4 = vbyu;
+  }
+
+  __CLC_GENTYPE q5, q6;
+  {
+    __CLC_GENTYPE u1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 0xffffffff00000000UL);
+    __CLC_GENTYPE u2 = u - u1;
+    __CLC_GENTYPE vu1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 0xffffffff00000000UL);
+    __CLC_GENTYPE vu2 = vbyu - vu1;
+
+    q5 = 0.0;
+    __CLC_GENTYPE s = vbyu * vbyu;
+    q6 = vbyu +
+         __clc_fma(
+             -vbyu * s,
+             __clc_fma(
+                 -s,
+                 __clc_fma(-s,
+                           __clc_fma(-s,
+                                     __clc_fma(-s, 0.90029810285449784439E-01,
+                                               0.11110736283514525407),
+                                     0.14285713561807169030),
+                           0.19999999999393223405),
+                 0.33333333333333170500),
+             MATH_DIVIDE(__clc_fma(-u, vu2,
+                                   __clc_fma(-u2, vu1, __clc_fma(-u1, vu1, 
v))),
+                         u));
+  }
+
+  q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
+  q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
+
+  q1 = vbyu > 0.0625 ? q1 : q3;
+  q2 = vbyu > 0.0625 ? q2 : q4;
+
+  // Tidy-up according to which quadrant the arguments lie in
+  __CLC_GENTYPE res1, res2, res3, res4;
+  q1 = swap_vu ? piby2_head - q1 : q1;
+  q2 = swap_vu ? piby2_tail - q2 : q2;
+  q1 = xneg ? pi_head - q1 : q1;
+  q2 = xneg ? pi_tail - q2 : q2;
+  q1 = MATH_DIVIDE(q1 + q2, pi);
+  res4 = yneg ? -q1 : q1;
+
+  res1 = yneg ? -0.75 : 0.75;
+  res2 = yneg ? -0.25 : 0.25;
+  res3 = xneg ? res1 : res2;
+
+  res3 = __clc_select(res4, res3,
+                      __CLC_CONVERT_LONGN(__clc_isinf(y2) & __clc_isinf(x2)));
+  res1 = yneg ? -1.0 : 1.0;
+
+  // abs(x)/abs(y) > 2^56 and x < 0
+  res3 = diffexp < -56 && xneg ? res1 : res3;
+
+  res4 = MATH_DIVIDE(MATH_DIVIDE(y, x), pi);
+  // x positive and dominant over y by a factor of 2^28
+  res3 = diffexp < -28 && xneg == 0 ? res4 : res3;
+
+  // abs(y)/abs(x) > 2^56
+  res4 = yneg ? -0.5 : 0.5; // atan(y/x) is insignificant compared to piby2
+  res3 = diffexp > 56 ? res4 : res3;
+
+  res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of 
y
+  res4 = xneg ? res1 : y2;
+
+  // Zero y gives +-0 for positive x and +-pi for negative x
+  res3 = y2 == 0.0 ? res4 : res3;
+  res3 = __clc_isnan(y2) ? y2 : res3;
+  res3 = __clc_isnan(x2) ? x2 : res3;
+
+  return res3;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2pi(__CLC_GENTYPE x,
+                                                   __CLC_GENTYPE y) {
+  return __CLC_CONVERT_GENTYPE(
+      __clc_atan2pi(__CLC_CONVERT_FLOATN(x), __CLC_CONVERT_FLOATN(y)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_tables.cl 
b/libclc/clc/lib/generic/math/clc_tables.cl
index 7ad005f91b575..2d6d280f7ea06 100644
--- a/libclc/clc/lib/generic/math/clc_tables.cl
+++ b/libclc/clc/lib/generic/math/clc_tables.cl
@@ -180,4 +180,181 @@ DECLARE_TABLE(double, LN_TBL_HI, 65) = {
 
 CLC_TABLE_FUNCTION(double, LN_TBL_HI, ln_tbl_hi);
 
+// Arrays atan_jby256_head and atan_jby256_tail contain leading and trailing
+// parts respectively of precomputed values of atan(j/256), for j = 16, 17, 
...,
+// 256. atan_jby256_head contains the first 21 bits of precision, and
+// atan_jby256_tail contains a further 53 bits precision.
+
+DECLARE_TABLE(double, ATAN_JBY256_TBL_HEAD, 241) = {
+    0x1.ff55b00000000p-5, 0x1.0f99e00000000p-4, 0x1.1f86d00000000p-4,
+    0x1.2f71900000000p-4, 0x1.3f59f00000000p-4, 0x1.4f3fd00000000p-4,
+    0x1.5f23200000000p-4, 0x1.6f03b00000000p-4, 0x1.7ee1800000000p-4,
+    0x1.8ebc500000000p-4, 0x1.9e94100000000p-4, 0x1.ae68a00000000p-4,
+    0x1.be39e00000000p-4, 0x1.ce07c00000000p-4, 0x1.ddd2100000000p-4,
+    0x1.ed98c00000000p-4, 0x1.fd5ba00000000p-4, 0x1.068d500000000p-3,
+    0x1.0e6ad00000000p-3, 0x1.1646500000000p-3, 0x1.1e1fa00000000p-3,
+    0x1.25f6e00000000p-3, 0x1.2dcbd00000000p-3, 0x1.359e800000000p-3,
+    0x1.3d6ee00000000p-3, 0x1.453ce00000000p-3, 0x1.4d08700000000p-3,
+    0x1.54d1800000000p-3, 0x1.5c98100000000p-3, 0x1.645bf00000000p-3,
+    0x1.6c1d400000000p-3, 0x1.73dbd00000000p-3, 0x1.7b97b00000000p-3,
+    0x1.8350b00000000p-3, 0x1.8b06e00000000p-3, 0x1.92ba300000000p-3,
+    0x1.9a6a800000000p-3, 0x1.a217e00000000p-3, 0x1.a9c2300000000p-3,
+    0x1.b169600000000p-3, 0x1.b90d700000000p-3, 0x1.c0ae500000000p-3,
+    0x1.c84bf00000000p-3, 0x1.cfe6500000000p-3, 0x1.d77d500000000p-3,
+    0x1.df11000000000p-3, 0x1.e6a1400000000p-3, 0x1.ee2e100000000p-3,
+    0x1.f5b7500000000p-3, 0x1.fd3d100000000p-3, 0x1.025fa00000000p-2,
+    0x1.061ee00000000p-2, 0x1.09dc500000000p-2, 0x1.0d97e00000000p-2,
+    0x1.1151a00000000p-2, 0x1.1509700000000p-2, 0x1.18bf500000000p-2,
+    0x1.1c73500000000p-2, 0x1.2025500000000p-2, 0x1.23d5600000000p-2,
+    0x1.2783700000000p-2, 0x1.2b2f700000000p-2, 0x1.2ed9800000000p-2,
+    0x1.3281800000000p-2, 0x1.3627700000000p-2, 0x1.39cb400000000p-2,
+    0x1.3d6d100000000p-2, 0x1.410cb00000000p-2, 0x1.44aa400000000p-2,
+    0x1.4845a00000000p-2, 0x1.4bdee00000000p-2, 0x1.4f75f00000000p-2,
+    0x1.530ad00000000p-2, 0x1.569d800000000p-2, 0x1.5a2e000000000p-2,
+    0x1.5dbc300000000p-2, 0x1.6148400000000p-2, 0x1.64d1f00000000p-2,
+    0x1.6859700000000p-2, 0x1.6bdea00000000p-2, 0x1.6f61900000000p-2,
+    0x1.72e2200000000p-2, 0x1.7660700000000p-2, 0x1.79dc600000000p-2,
+    0x1.7d56000000000p-2, 0x1.80cd400000000p-2, 0x1.8442200000000p-2,
+    0x1.87b4b00000000p-2, 0x1.8b24d00000000p-2, 0x1.8e92900000000p-2,
+    0x1.91fde00000000p-2, 0x1.9566d00000000p-2, 0x1.98cd500000000p-2,
+    0x1.9c31600000000p-2, 0x1.9f93000000000p-2, 0x1.a2f2300000000p-2,
+    0x1.a64ee00000000p-2, 0x1.a9a9200000000p-2, 0x1.ad00f00000000p-2,
+    0x1.b056400000000p-2, 0x1.b3a9100000000p-2, 0x1.b6f9600000000p-2,
+    0x1.ba47300000000p-2, 0x1.bd92800000000p-2, 0x1.c0db400000000p-2,
+    0x1.c421900000000p-2, 0x1.c765500000000p-2, 0x1.caa6800000000p-2,
+    0x1.cde5300000000p-2, 0x1.d121500000000p-2, 0x1.d45ae00000000p-2,
+    0x1.d791f00000000p-2, 0x1.dac6700000000p-2, 0x1.ddf8500000000p-2,
+    0x1.e127b00000000p-2, 0x1.e454800000000p-2, 0x1.e77eb00000000p-2,
+    0x1.eaa6500000000p-2, 0x1.edcb600000000p-2, 0x1.f0ede00000000p-2,
+    0x1.f40dd00000000p-2, 0x1.f72b200000000p-2, 0x1.fa45d00000000p-2,
+    0x1.fd5e000000000p-2, 0x1.0039c00000000p-1, 0x1.01c3400000000p-1,
+    0x1.034b700000000p-1, 0x1.04d2500000000p-1, 0x1.0657e00000000p-1,
+    0x1.07dc300000000p-1, 0x1.095f300000000p-1, 0x1.0ae0e00000000p-1,
+    0x1.0c61400000000p-1, 0x1.0de0500000000p-1, 0x1.0f5e200000000p-1,
+    0x1.10daa00000000p-1, 0x1.1255d00000000p-1, 0x1.13cfb00000000p-1,
+    0x1.1548500000000p-1, 0x1.16bfa00000000p-1, 0x1.1835a00000000p-1,
+    0x1.19aa500000000p-1, 0x1.1b1dc00000000p-1, 0x1.1c8fe00000000p-1,
+    0x1.1e00b00000000p-1, 0x1.1f70400000000p-1, 0x1.20de800000000p-1,
+    0x1.224b700000000p-1, 0x1.23b7100000000p-1, 0x1.2521700000000p-1,
+    0x1.268a900000000p-1, 0x1.27f2600000000p-1, 0x1.2958e00000000p-1,
+    0x1.2abe200000000p-1, 0x1.2c22100000000p-1, 0x1.2d84c00000000p-1,
+    0x1.2ee6200000000p-1, 0x1.3046400000000p-1, 0x1.31a5200000000p-1,
+    0x1.3302b00000000p-1, 0x1.345f000000000p-1, 0x1.35ba000000000p-1,
+    0x1.3713d00000000p-1, 0x1.386c500000000p-1, 0x1.39c3900000000p-1,
+    0x1.3b19800000000p-1, 0x1.3c6e400000000p-1, 0x1.3dc1c00000000p-1,
+    0x1.3f13f00000000p-1, 0x1.4064f00000000p-1, 0x1.41b4a00000000p-1,
+    0x1.4303200000000p-1, 0x1.4450600000000p-1, 0x1.459c600000000p-1,
+    0x1.46e7200000000p-1, 0x1.4830a00000000p-1, 0x1.4978f00000000p-1,
+    0x1.4ac0000000000p-1, 0x1.4c05e00000000p-1, 0x1.4d4a800000000p-1,
+    0x1.4e8de00000000p-1, 0x1.4fd0100000000p-1, 0x1.5111000000000p-1,
+    0x1.5250c00000000p-1, 0x1.538f500000000p-1, 0x1.54cca00000000p-1,
+    0x1.5608d00000000p-1, 0x1.5743c00000000p-1, 0x1.587d800000000p-1,
+    0x1.59b6000000000p-1, 0x1.5aed600000000p-1, 0x1.5c23900000000p-1,
+    0x1.5d58900000000p-1, 0x1.5e8c600000000p-1, 0x1.5fbf000000000p-1,
+    0x1.60f0800000000p-1, 0x1.6220d00000000p-1, 0x1.634ff00000000p-1,
+    0x1.647de00000000p-1, 0x1.65aab00000000p-1, 0x1.66d6600000000p-1,
+    0x1.6800e00000000p-1, 0x1.692a400000000p-1, 0x1.6a52700000000p-1,
+    0x1.6b79800000000p-1, 0x1.6c9f700000000p-1, 0x1.6dc4400000000p-1,
+    0x1.6ee7f00000000p-1, 0x1.700a700000000p-1, 0x1.712be00000000p-1,
+    0x1.724c300000000p-1, 0x1.736b600000000p-1, 0x1.7489700000000p-1,
+    0x1.75a6700000000p-1, 0x1.76c2400000000p-1, 0x1.77dd100000000p-1,
+    0x1.78f6b00000000p-1, 0x1.7a0f400000000p-1, 0x1.7b26c00000000p-1,
+    0x1.7c3d300000000p-1, 0x1.7d52800000000p-1, 0x1.7e66c00000000p-1,
+    0x1.7f79e00000000p-1, 0x1.808c000000000p-1, 0x1.819d000000000p-1,
+    0x1.82ad000000000p-1, 0x1.83bbe00000000p-1, 0x1.84c9c00000000p-1,
+    0x1.85d6900000000p-1, 0x1.86e2500000000p-1, 0x1.87ed000000000p-1,
+    0x1.88f6b00000000p-1, 0x1.89ff500000000p-1, 0x1.8b06f00000000p-1,
+    0x1.8c0d900000000p-1, 0x1.8d13200000000p-1, 0x1.8e17a00000000p-1,
+    0x1.8f1b300000000p-1, 0x1.901db00000000p-1, 0x1.911f300000000p-1,
+    0x1.921fb00000000p-1,
+};
+
+CLC_TABLE_FUNCTION(double, ATAN_JBY256_TBL_HEAD, atan_jby256_tbl_head);
+
+DECLARE_TABLE(double, ATAN_JBY256_TBL_TAIL, 241) = {
+    0x1.6e59fbd38db2cp-26, 0x1.4e3aa54dedf96p-25, 0x1.7e105ab1bda88p-25,
+    0x1.8c5254d013fd0p-27, 0x1.cf8ab3ad62670p-29, 0x1.9dca4bec80468p-26,
+    0x1.3f4b5ec98a8dap-26, 0x1.b9d49619d81fep-25, 0x1.3017887460934p-27,
+    0x1.11e3eca0b9944p-26, 0x1.4f3f73c5a332ep-26, 0x1.c71c8ae0e00a6p-26,
+    0x1.7cde0f86fbdc7p-25, 0x1.70f328c889c72p-26, 0x1.c07ae9b994efep-26,
+    0x1.0c8021d7b1698p-27, 0x1.35585edb8cb22p-25, 0x1.0842567b30e96p-24,
+    0x1.99e811031472ep-24, 0x1.041821416bceep-25, 0x1.f6086e4dc96f4p-24,
+    0x1.71a535c5f1b58p-27, 0x1.65f743fe63ca1p-24, 0x1.dbd733472d014p-24,
+    0x1.d18cc4d8b0d1dp-24, 0x1.8c12553c8fb29p-24, 0x1.53b49e2e8f991p-24,
+    0x1.7422ae148c141p-24, 0x1.e3ec269df56a8p-27, 0x1.ff6754e7e0ac9p-24,
+    0x1.131267b1b5aadp-24, 0x1.d14fa403a94bcp-24, 0x1.2f396c089a3d8p-25,
+    0x1.c731d78fa95bbp-24, 0x1.c50f385177399p-24, 0x1.f41409c6f2c20p-25,
+    0x1.d2d90c4c39ec0p-24, 0x1.80420696f2106p-25, 0x1.b40327943a2e8p-27,
+    0x1.5d35e02f3d2a2p-25, 0x1.4a498288117b0p-25, 0x1.35da119afb324p-25,
+    0x1.14e85cdb9a908p-24, 0x1.38754e5547b9ap-25, 0x1.be40ae6ce3246p-24,
+    0x1.0c993b3bea7e7p-24, 0x1.1d2dd89ac3359p-24, 0x1.1476603332c46p-25,
+    0x1.f25901bac55b7p-24, 0x1.f881b7c826e28p-24, 0x1.441996d698d20p-24,
+    0x1.407ac521ea089p-23, 0x1.2fb0c6c4b1723p-23, 0x1.ca135966a3e18p-23,
+    0x1.b1218e4d646e4p-25, 0x1.d4e72a350d288p-25, 0x1.4617e2f04c329p-23,
+    0x1.096ec41e82650p-25, 0x1.9f91f25773e6ep-24, 0x1.59c0820f1d674p-25,
+    0x1.02bf7a2df1064p-25, 0x1.fb36bfc40508fp-23, 0x1.ea08f3f8dc892p-24,
+    0x1.3ed6254656a0ep-24, 0x1.b83f5e5e69c58p-25, 0x1.d6ec2af768592p-23,
+    0x1.493889a226f94p-25, 0x1.5ad8fa65279bap-23, 0x1.b615784d45434p-25,
+    0x1.09a184368f145p-23, 0x1.61a2439b0d91cp-24, 0x1.ce1a65e39a978p-24,
+    0x1.32a39a93b6a66p-23, 0x1.1c3699af804e7p-23, 0x1.75e0f4e44ede8p-26,
+    0x1.f77ced1a7a83bp-23, 0x1.84e7f0cb1b500p-29, 0x1.ec6b838b02dfep-23,
+    0x1.3ebf4dfbeda87p-23, 0x1.9397aed9cb475p-23, 0x1.07937bc239c54p-24,
+    0x1.aa754553131b6p-23, 0x1.4a05d407c45dcp-24, 0x1.132231a206dd0p-23,
+    0x1.2d8ecfdd69c88p-24, 0x1.a852c74218606p-24, 0x1.71bf2baeebb50p-23,
+    0x1.83d7db7491820p-27, 0x1.ca50d92b6da14p-25, 0x1.6f5cde8530298p-26,
+    0x1.f343198910740p-24, 0x1.0e8d241ccd80ap-24, 0x1.1535ac619e6c8p-24,
+    0x1.7316041c36cd2p-24, 0x1.985a000637d8ep-24, 0x1.f2f29858c0a68p-25,
+    0x1.879847f96d909p-23, 0x1.ab3d319e12e42p-23, 0x1.5088162dfc4c2p-24,
+    0x1.05749a1cd9d8cp-25, 0x1.da65c6c6b8618p-26, 0x1.739bf7df1ad64p-25,
+    0x1.bc31252aa3340p-25, 0x1.e528191ad3aa8p-26, 0x1.929d93df19f18p-23,
+    0x1.ff11eb693a080p-26, 0x1.55ae3f145a3a0p-27, 0x1.cbcd8c6c0ca82p-24,
+    0x1.0cb04d425d304p-24, 0x1.9adfcab5be678p-24, 0x1.93d90c5662508p-23,
+    0x1.68489bd35ff40p-24, 0x1.586ed3da2b7e0p-28, 0x1.7604d2e850eeep-23,
+    0x1.ac1d12bfb53d8p-24, 0x1.9b3d468274740p-28, 0x1.fc5d68d10e53cp-24,
+    0x1.8f9e51884becbp-23, 0x1.a87f0869c06d1p-23, 0x1.31e7279f685fap-23,
+    0x1.6a8282f9719b0p-27, 0x1.0d2724a8a44e0p-25, 0x1.a60524b11ad4ep-23,
+    0x1.75fdf832750f0p-26, 0x1.cf06902e4cd36p-23, 0x1.e82422d4f6d10p-25,
+    0x1.24a091063e6c0p-26, 0x1.8a1a172dc6f38p-24, 0x1.29b6619f8a92dp-22,
+    0x1.9274d9c1b70c8p-24, 0x1.0c34b1fbb7930p-26, 0x1.639866c20eb50p-25,
+    0x1.6d6d0f6832e9ep-23, 0x1.af54def99f25ep-22, 0x1.16cfc52a00262p-22,
+    0x1.dcc1e83569c32p-23, 0x1.37f7a551ed425p-22, 0x1.f6360adc98887p-22,
+    0x1.2c6ec8d35a2c1p-22, 0x1.bd44df84cb036p-23, 0x1.117cf826e310ep-22,
+    0x1.ca533f332cfc9p-22, 0x1.0f208509dbc2ep-22, 0x1.cd07d93c945dep-23,
+    0x1.57bdfd67e6d72p-22, 0x1.aab89c516c658p-24, 0x1.3e823b1a1b8a0p-25,
+    0x1.307464a9d6d3cp-23, 0x1.c5993cd438843p-22, 0x1.ba2fca02ab554p-22,
+    0x1.01a5b6983a268p-23, 0x1.273d1b350efc8p-25, 0x1.64c238c37b0c6p-23,
+    0x1.aded07370a300p-25, 0x1.78091197eb47ep-23, 0x1.4b0f245e0dabcp-24,
+    0x1.080d9794e2eafp-22, 0x1.d4ec242b60c76p-23, 0x1.221d2f940caa0p-27,
+    0x1.cdbc42b2bba5cp-24, 0x1.cce37bb440840p-25, 0x1.6c1d999cf1dd0p-22,
+    0x1.bed8a07eb0870p-26, 0x1.69ed88f490e3cp-24, 0x1.cd41719b73ef0p-25,
+    0x1.cbc4ac95b41b7p-22, 0x1.238f1b890f5d7p-22, 0x1.50c4282259cc4p-24,
+    0x1.713d2de87b3e2p-22, 0x1.1d5a7d2255276p-23, 0x1.c0dfd48227ac1p-22,
+    0x1.1c964dab76753p-22, 0x1.6de56d5704496p-23, 0x1.4aeb71fd19968p-23,
+    0x1.fbf91c57b1918p-23, 0x1.d6bef7fbe5d9ap-22, 0x1.464d3dc249066p-22,
+    0x1.638e2ec4d9073p-22, 0x1.16f4a7247ea7cp-24, 0x1.1a0a740f1d440p-28,
+    0x1.6edbb0114a33cp-23, 0x1.dbee8bf1d513cp-24, 0x1.5b8bdb0248f73p-22,
+    0x1.7de3d3f5eac64p-22, 0x1.ee24187ae448ap-23, 0x1.e06c591ec5192p-22,
+    0x1.4e3861a332738p-24, 0x1.a9599dcc2bfe4p-24, 0x1.f732fbad43468p-25,
+    0x1.eb9f573b727d9p-22, 0x1.8b212a2eb9897p-22, 0x1.384884c167215p-22,
+    0x1.0e2d363020051p-22, 0x1.2820879fbd022p-22, 0x1.a1ab9893e4b30p-22,
+    0x1.2d1b817a24478p-23, 0x1.15d7b8ded4878p-25, 0x1.8968f9db3a5e4p-24,
+    0x1.71c4171fe135fp-22, 0x1.6d80f605d0d8cp-22, 0x1.c91f043691590p-24,
+    0x1.39f8a15fce2b2p-23, 0x1.55beda9d94b80p-27, 0x1.b12c15d60949ap-23,
+    0x1.24167b312bfe3p-22, 0x1.0ab8633070277p-22, 0x1.54554ebbc80eep-23,
+    0x1.0204aef5a4bb8p-25, 0x1.8af08c679cf2cp-22, 0x1.0852a330ae6c8p-22,
+    0x1.6d3eb9ec32916p-23, 0x1.685cb7fcbbafep-23, 0x1.1f751c1e0bd95p-22,
+    0x1.705b1b0f72560p-26, 0x1.b98d8d808ca92p-22, 0x1.2ea22c75cc980p-25,
+    0x1.7aba62bca0350p-22, 0x1.d73833442278cp-22, 0x1.5a5ca1fb18bf9p-22,
+    0x1.1a6092b6ecf28p-25, 0x1.44fd049aac104p-24, 0x1.c114fd8df5180p-29,
+    0x1.5972f130feae5p-22, 0x1.ca034a55fe198p-24, 0x1.6e2b149990227p-22,
+    0x1.b00000294592cp-24, 0x1.8b9bdc442620ep-22, 0x1.d94fdfabf3e4ep-23,
+    0x1.5db30b145ad9ap-23, 0x1.e3e1eb95022b0p-23, 0x1.d5b8b45442bd6p-22,
+    0x1.7a046231ecd2ep-22, 0x1.feafe3ef55232p-22, 0x1.839e7bfd78267p-22,
+    0x1.45cf49d6fa900p-25, 0x1.be3132b27f380p-27, 0x1.533980bb84f9fp-22,
+    0x1.889e2ce3ba390p-26, 0x1.f7778c3ad0cc8p-24, 0x1.46660cec4eba2p-23,
+    0x1.5110b4611a626p-23,
+};
+
+CLC_TABLE_FUNCTION(double, ATAN_JBY256_TBL_TAIL, atan_jby256_tbl_tail);
+
 #endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/atan2.cl b/libclc/generic/lib/math/atan2.cl
index 635f1cdfaca7e..20651c1ec1bf3 100644
--- a/libclc/generic/lib/math/atan2.cl
+++ b/libclc/generic/lib/math/atan2.cl
@@ -8,223 +8,9 @@
 
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_atan2.h>
 
-_CLC_OVERLOAD _CLC_DEF float atan2(float y, float x)
-{
-    const float pi = 0x1.921fb6p+1f;
-    const float piby2 = 0x1.921fb6p+0f;
-    const float piby4 = 0x1.921fb6p-1f;
-    const float threepiby4 = 0x1.2d97c8p+1f;
+#define FUNCTION atan2
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-    float ax = fabs(x);
-    float ay = fabs(y);
-    float v = min(ax, ay);
-    float u = max(ax, ay);
-
-    // Scale since u could be large, as in "regular" divide
-    float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
-    float vbyu = s * MATH_DIVIDE(v, s*u);
-
-    float vbyu2 = vbyu * vbyu;
-
-#define USE_2_2_APPROXIMATION
-#if defined USE_2_2_APPROXIMATION
-    float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), 
-0x1.5554d0p-2f) * vbyu2 * vbyu;
-    float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
-#else
-    float p = mad(vbyu2, mad(vbyu2, -0x1.55cd22p-5f, -0x1.26cf76p-2f), 
-0x1.55554ep-2f) * vbyu2 * vbyu;
-    float q = mad(vbyu2, mad(vbyu2, mad(vbyu2, 0x1.9f1304p-5f, 
0x1.2656fap-1f), 0x1.76b4b8p+0f), 1.0f);
-#endif
-
-    // Octant 0 result
-    float a = mad(p, MATH_RECIP(q), vbyu);
-
-    // Fix up 3 other octants
-    float at = piby2 - a;
-    a = ay > ax ? at : a;
-    at = pi - a;
-    a = x < 0.0F ? at : a;
-
-    // y == 0 => 0 for x >= 0, pi for x < 0
-    at = as_int(x) < 0 ? pi : 0.0f;
-    a = y == 0.0f ? at : a;
-
-    // if (!FINITE_ONLY()) {
-        // x and y are +- Inf
-        at = x > 0.0f ? piby4 : threepiby4;
-        a = ax == INFINITY & ay == INFINITY ? at : a;
-
-       // x or y is NaN
-       a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a;
-    // }
-
-    // Fixup sign and return
-    return copysign(a, y);
-}
-
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2, float, float);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
-{
-    const double pi = 3.1415926535897932e+00;          /* 0x400921fb54442d18 */
-    const double piby2 = 1.5707963267948966e+00;       /* 0x3ff921fb54442d18 */
-    const double piby4 = 7.8539816339744831e-01;       /* 0x3fe921fb54442d18 */
-    const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
-    const double pi_head = 3.1415926218032836e+00;     /* 0x400921fb50000000 */
-    const double pi_tail = 3.1786509547056392e-08;     /* 0x3e6110b4611a6263 */
-    const double piby2_head = 1.5707963267948965e+00;  /* 0x3ff921fb54442d18 */
-    const double piby2_tail = 6.1232339957367660e-17;  /* 0x3c91a62633145c07 */
-
-    double x2 = x;
-    int xneg = as_int2(x).hi < 0;
-    int xexp = (as_int2(x).hi >> 20) & 0x7ff;
-
-    double y2 = y;
-    int yneg = as_int2(y).hi < 0;
-    int yexp = (as_int2(y).hi >> 20) & 0x7ff;
-
-    int cond2 = (xexp < 1021) & (yexp < 1021);
-    int diffexp = yexp - xexp;
-
-    // Scale up both x and y if they are both below 1/4
-    double x1 = ldexp(x, 1024);
-    int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff;
-    double y1 = ldexp(y, 1024);
-    int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff;
-    int diffexp1 = yexp1 - xexp1;
-
-    diffexp = cond2 ? diffexp1 : diffexp;
-    x = cond2 ? x1 : x;
-    y = cond2 ? y1 : y;
-
-    // General case: take absolute values of arguments
-    double u = fabs(x);
-    double v = fabs(y);
-
-    // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
-    int swap_vu = u < v;
-    double uu = u;
-    u = swap_vu ? v : u;
-    v = swap_vu ? uu : v;
-
-    double vbyu = v / u;
-    double q1, q2;
-
-    // General values of v/u. Use a look-up table and series expansion.
-
-    {
-        double val = vbyu > 0.0625 ? vbyu : 0.063;
-        int index = convert_int(fma(256.0, val, 0.5));
-       double2 tv = USE_TABLE(atan_jby256_tbl, index - 16);
-       q1 = tv.s0;
-       q2 = tv.s1;
-        double c = (double)index * 0x1.0p-8;
-
-        // We're going to scale u and v by 2^(-u_exponent) to bring them close 
to 1
-        // u_exponent could be EMAX so we have to do it in 2 steps
-        int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
-       //double um = __amdil_ldexp_f64(u, m);
-       //double vm = __amdil_ldexp_f64(v, m);
-       double um = ldexp(u, m);
-       double vm = ldexp(v, m);
-
-        // 26 leading bits of u
-        double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL);
-        double u2 = um - u1;
-
-        double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um));
-
-        // Polynomial approximation to atan(r)
-        double s = r * r;
-        q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 
0.33333333333224095522)), -r, r);
-    }
-
-
-    double q3, q4;
-    {
-        q3 = 0.0;
-        q4 = vbyu;
-    }
-
-    double q5, q6;
-    {
-        double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL);
-        double u2 = u - u1;
-        double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL);
-        double vu2 = vbyu - vu1;
-
-        q5 = 0.0;
-        double s = vbyu * vbyu;
-        q6 = vbyu + fma(-vbyu * s,
-                        fma(-s,
-                            fma(-s,
-                                fma(-s,
-                                    fma(-s, 0.90029810285449784439E-01,
-                                        0.11110736283514525407),
-                                    0.14285713561807169030),
-                                0.19999999999393223405),
-                            0.33333333333333170500),
-                        MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, 
v))), u));
-    }
-
-
-    q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
-    q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
-
-    q1 = vbyu > 0.0625 ? q1 : q3;
-    q2 = vbyu > 0.0625 ? q2 : q4;
-
-    // Tidy-up according to which quadrant the arguments lie in
-    double res1, res2, res3, res4;
-    q1 = swap_vu ? piby2_head - q1 : q1;
-    q2 = swap_vu ? piby2_tail - q2 : q2;
-    q1 = xneg ? pi_head - q1 : q1;
-    q2 = xneg ? pi_tail - q2 : q2;
-    q1 = q1 + q2;
-    res4 = yneg ? -q1 : q1;
-
-    res1 = yneg ? -three_piby4 : three_piby4;
-    res2 = yneg ? -piby4 : piby4;
-    res3 = xneg ? res1 : res2;
-
-    res3 = isinf(x2) & isinf(y2) ? res3 : res4;
-    res1 = yneg ? -pi : pi;
-
-    // abs(x)/abs(y) > 2^56 and x < 0
-    res3 = (diffexp < -56 && xneg) ? res1 : res3;
-
-    res4 = MATH_DIVIDE(y, x);
-    // x positive and dominant over y by a factor of 2^28
-    res3 = diffexp < -28 & xneg == 0 ? res4 : res3;
-
-    // abs(y)/abs(x) > 2^56
-    res4 = yneg ? -piby2 : piby2;       // atan(y/x) is insignificant compared 
to piby2
-    res3 = diffexp > 56 ? res4 : res3;
-
-    res3 = x2 == 0.0 ? res4 : res3;   // Zero x gives +- pi/2 depending on 
sign of y
-    res4 = xneg ? res1 : y2;
-
-    res3 = y2 == 0.0 ? res4 : res3;   // Zero y gives +-0 for positive x and 
+-pi for negative x
-    res3 = isnan(y2) ? y2 : res3;
-    res3 = isnan(x2) ? x2 : res3;
-
-    return res3;
-}
-
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2)
-
-#endif
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/atan2pi.cl 
b/libclc/generic/lib/math/atan2pi.cl
index 667e4519d8043..316db1d6c9c48 100644
--- a/libclc/generic/lib/math/atan2pi.cl
+++ b/libclc/generic/lib/math/atan2pi.cl
@@ -8,207 +8,9 @@
 
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_atan2pi.h>
 
-_CLC_OVERLOAD _CLC_DEF  float atan2pi(float y, float x) {
-    const float pi = 0x1.921fb6p+1f;
+#define FUNCTION atan2pi
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-    float ax = fabs(x);
-    float ay = fabs(y);
-    float v = min(ax, ay);
-    float u = max(ax, ay);
-
-    // Scale since u could be large, as in "regular" divide
-    float s = u > 0x1.0p+96f ? 0x1.0p-32f : 1.0f;
-    float vbyu = s * MATH_DIVIDE(v, s*u);
-
-    float vbyu2 = vbyu * vbyu;
-
-    float p = mad(vbyu2, mad(vbyu2, -0x1.7e1f78p-9f, -0x1.7d1b98p-3f), 
-0x1.5554d0p-2f) * vbyu2 * vbyu;
-    float q = mad(vbyu2, mad(vbyu2, 0x1.1a714cp-2f, 0x1.287c56p+0f), 1.0f);
-
-    // Octant 0 result
-    float a = MATH_DIVIDE(mad(p, MATH_RECIP(q), vbyu), pi);
-
-    // Fix up 3 other octants
-    float at = 0.5f - a;
-    a = ay > ax ? at : a;
-    at = 1.0f - a;
-    a = x < 0.0F ? at : a;
-
-    // y == 0 => 0 for x >= 0, pi for x < 0
-    at = as_int(x) < 0 ? 1.0f : 0.0f;
-    a = y == 0.0f ? at : a;
-
-    // if (!FINITE_ONLY()) {
-        // x and y are +- Inf
-        at = x > 0.0f ? 0.25f : 0.75f;
-        a = ax == INFINITY & ay == INFINITY ? at : a;
-
-       // x or y is NaN
-       a = isnan(x) | isnan(y) ? as_float(QNANBITPATT_SP32) : a;
-    // }
-
-    // Fixup sign and return
-    return copysign(a, y);
-}
-
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2pi, float, float)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double atan2pi(double y, double x) {
-    const double pi = 3.1415926535897932e+00;          /* 0x400921fb54442d18 */
-    const double pi_head = 3.1415926218032836e+00;     /* 0x400921fb50000000 */
-    const double pi_tail = 3.1786509547056392e-08;     /* 0x3e6110b4611a6263 */
-    const double piby2_head = 1.5707963267948965e+00;  /* 0x3ff921fb54442d18 */
-    const double piby2_tail = 6.1232339957367660e-17;  /* 0x3c91a62633145c07 */
-
-    double x2 = x;
-    int xneg = as_int2(x).hi < 0;
-    int xexp = (as_int2(x).hi >> 20) & 0x7ff;
-
-    double y2 = y;
-    int yneg = as_int2(y).hi < 0;
-    int yexp = (as_int2(y).hi >> 20) & 0x7ff;
-
-    int cond2 = (xexp < 1021) & (yexp < 1021);
-    int diffexp = yexp - xexp;
-
-    // Scale up both x and y if they are both below 1/4
-    double x1 = ldexp(x, 1024);
-    int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff;
-    double y1 = ldexp(y, 1024);
-    int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff;
-    int diffexp1 = yexp1 - xexp1;
-
-    diffexp = cond2 ? diffexp1 : diffexp;
-    x = cond2 ? x1 : x;
-    y = cond2 ? y1 : y;
-
-    // General case: take absolute values of arguments
-    double u = fabs(x);
-    double v = fabs(y);
-
-    // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
-    int swap_vu = u < v;
-    double uu = u;
-    u = swap_vu ? v : u;
-    v = swap_vu ? uu : v;
-
-    double vbyu = v / u;
-    double q1, q2;
-
-    // General values of v/u. Use a look-up table and series expansion.
-
-    {
-        double val = vbyu > 0.0625 ? vbyu : 0.063;
-        int index = convert_int(fma(256.0, val, 0.5));
-       double2 tv = USE_TABLE(atan_jby256_tbl, (index - 16));
-       q1 = tv.s0;
-       q2 = tv.s1;
-        double c = (double)index * 0x1.0p-8;
-
-        // We're going to scale u and v by 2^(-u_exponent) to bring them close 
to 1
-        // u_exponent could be EMAX so we have to do it in 2 steps
-        int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
-           double um = ldexp(u, m);
-           double vm = ldexp(v, m);
-
-        // 26 leading bits of u
-        double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL);
-        double u2 = um - u1;
-
-        double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um));
-
-        // Polynomial approximation to atan(r)
-        double s = r * r;
-        q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 
0.33333333333224095522)), -r, r);
-    }
-
-
-    double q3, q4;
-    {
-        q3 = 0.0;
-        q4 = vbyu;
-    }
-
-    double q5, q6;
-    {
-        double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL);
-        double u2 = u - u1;
-        double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL);
-        double vu2 = vbyu - vu1;
-
-        q5 = 0.0;
-        double s = vbyu * vbyu;
-        q6 = vbyu + fma(-vbyu * s,
-                        fma(-s,
-                            fma(-s,
-                                fma(-s,
-                                    fma(-s, 0.90029810285449784439E-01,
-                                        0.11110736283514525407),
-                                    0.14285713561807169030),
-                                0.19999999999393223405),
-                            0.33333333333333170500),
-                        MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, 
v))), u));
-    }
-
-
-    q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
-    q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
-
-    q1 = vbyu > 0.0625 ? q1 : q3;
-    q2 = vbyu > 0.0625 ? q2 : q4;
-
-    // Tidy-up according to which quadrant the arguments lie in
-    double res1, res2, res3, res4;
-    q1 = swap_vu ? piby2_head - q1 : q1;
-    q2 = swap_vu ? piby2_tail - q2 : q2;
-    q1 = xneg ? pi_head - q1 : q1;
-    q2 = xneg ? pi_tail - q2 : q2;
-    q1 = MATH_DIVIDE(q1 + q2, pi);
-    res4 = yneg ? -q1 : q1;
-
-    res1 = yneg ? -0.75 : 0.75;
-    res2 = yneg ? -0.25 : 0.25;
-    res3 = xneg ? res1 : res2;
-
-    res3 = isinf(y2) & isinf(x2) ? res3 : res4;
-    res1 = yneg ? -1.0 : 1.0;
-
-    // abs(x)/abs(y) > 2^56 and x < 0
-    res3 = (diffexp < -56 && xneg) ? res1 : res3;
-
-    res4 = MATH_DIVIDE(MATH_DIVIDE(y, x), pi);
-    // x positive and dominant over y by a factor of 2^28
-    res3 = diffexp < -28 & xneg == 0 ? res4 : res3;
-
-    // abs(y)/abs(x) > 2^56
-    res4 = yneg ? -0.5 : 0.5;        // atan(y/x) is insignificant compared to 
piby2
-    res3 = diffexp > 56 ? res4 : res3;
-
-    res3 = x2 == 0.0 ? res4 : res3;  // Zero x gives +- pi/2 depending on sign 
of y
-    res4 = xneg ? res1 : y2;
-
-    res3 = y2 == 0.0 ? res4 : res3;  // Zero y gives +-0 for positive x and 
+-pi for negative x
-    res3 = isnan(y2) ? y2 : res3;
-    res3 = isnan(x2) ? x2 : res3;
-
-    return res3;
-}
-
-
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2pi, double, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN_FP16(atan2pi)
-
-#endif
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/tables.cl 
b/libclc/generic/lib/math/tables.cl
index d7d3ba4aafec9..c03b4d1a3c18a 100644
--- a/libclc/generic/lib/math/tables.cl
+++ b/libclc/generic/lib/math/tables.cl
@@ -745,258 +745,6 @@ TABLE_FUNCTION(float2, EXP_TBL_EP, exp_tbl_ep);
 
 #ifdef cl_khr_fp64
 
-
-
-// Arrays atan_jby256_lead and atan_jby256_tail contain
-// leading and trailing parts respectively of precomputed
-// values of atan(j/256), for j = 16, 17, ..., 256.
-// atan_jby256_lead contains the first 21 bits of precision,
-// and atan_jby256_tail contains a further 53 bits precision.
-
-DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
-    (double2)(0x1.ff55b00000000p-5, 0x1.6e59fbd38db2cp-26),
-    (double2)(0x1.0f99e00000000p-4, 0x1.4e3aa54dedf96p-25),
-    (double2)(0x1.1f86d00000000p-4, 0x1.7e105ab1bda88p-25),
-    (double2)(0x1.2f71900000000p-4, 0x1.8c5254d013fd0p-27),
-    (double2)(0x1.3f59f00000000p-4, 0x1.cf8ab3ad62670p-29),
-    (double2)(0x1.4f3fd00000000p-4, 0x1.9dca4bec80468p-26),
-    (double2)(0x1.5f23200000000p-4, 0x1.3f4b5ec98a8dap-26),
-    (double2)(0x1.6f03b00000000p-4, 0x1.b9d49619d81fep-25),
-    (double2)(0x1.7ee1800000000p-4, 0x1.3017887460934p-27),
-    (double2)(0x1.8ebc500000000p-4, 0x1.11e3eca0b9944p-26),
-    (double2)(0x1.9e94100000000p-4, 0x1.4f3f73c5a332ep-26),
-    (double2)(0x1.ae68a00000000p-4, 0x1.c71c8ae0e00a6p-26),
-    (double2)(0x1.be39e00000000p-4, 0x1.7cde0f86fbdc7p-25),
-    (double2)(0x1.ce07c00000000p-4, 0x1.70f328c889c72p-26),
-    (double2)(0x1.ddd2100000000p-4, 0x1.c07ae9b994efep-26),
-    (double2)(0x1.ed98c00000000p-4, 0x1.0c8021d7b1698p-27),
-    (double2)(0x1.fd5ba00000000p-4, 0x1.35585edb8cb22p-25),
-    (double2)(0x1.068d500000000p-3, 0x1.0842567b30e96p-24),
-    (double2)(0x1.0e6ad00000000p-3, 0x1.99e811031472ep-24),
-    (double2)(0x1.1646500000000p-3, 0x1.041821416bceep-25),
-    (double2)(0x1.1e1fa00000000p-3, 0x1.f6086e4dc96f4p-24),
-    (double2)(0x1.25f6e00000000p-3, 0x1.71a535c5f1b58p-27),
-    (double2)(0x1.2dcbd00000000p-3, 0x1.65f743fe63ca1p-24),
-    (double2)(0x1.359e800000000p-3, 0x1.dbd733472d014p-24),
-    (double2)(0x1.3d6ee00000000p-3, 0x1.d18cc4d8b0d1dp-24),
-    (double2)(0x1.453ce00000000p-3, 0x1.8c12553c8fb29p-24),
-    (double2)(0x1.4d08700000000p-3, 0x1.53b49e2e8f991p-24),
-    (double2)(0x1.54d1800000000p-3, 0x1.7422ae148c141p-24),
-    (double2)(0x1.5c98100000000p-3, 0x1.e3ec269df56a8p-27),
-    (double2)(0x1.645bf00000000p-3, 0x1.ff6754e7e0ac9p-24),
-    (double2)(0x1.6c1d400000000p-3, 0x1.131267b1b5aadp-24),
-    (double2)(0x1.73dbd00000000p-3, 0x1.d14fa403a94bcp-24),
-    (double2)(0x1.7b97b00000000p-3, 0x1.2f396c089a3d8p-25),
-    (double2)(0x1.8350b00000000p-3, 0x1.c731d78fa95bbp-24),
-    (double2)(0x1.8b06e00000000p-3, 0x1.c50f385177399p-24),
-    (double2)(0x1.92ba300000000p-3, 0x1.f41409c6f2c20p-25),
-    (double2)(0x1.9a6a800000000p-3, 0x1.d2d90c4c39ec0p-24),
-    (double2)(0x1.a217e00000000p-3, 0x1.80420696f2106p-25),
-    (double2)(0x1.a9c2300000000p-3, 0x1.b40327943a2e8p-27),
-    (double2)(0x1.b169600000000p-3, 0x1.5d35e02f3d2a2p-25),
-    (double2)(0x1.b90d700000000p-3, 0x1.4a498288117b0p-25),
-    (double2)(0x1.c0ae500000000p-3, 0x1.35da119afb324p-25),
-    (double2)(0x1.c84bf00000000p-3, 0x1.14e85cdb9a908p-24),
-    (double2)(0x1.cfe6500000000p-3, 0x1.38754e5547b9ap-25),
-    (double2)(0x1.d77d500000000p-3, 0x1.be40ae6ce3246p-24),
-    (double2)(0x1.df11000000000p-3, 0x1.0c993b3bea7e7p-24),
-    (double2)(0x1.e6a1400000000p-3, 0x1.1d2dd89ac3359p-24),
-    (double2)(0x1.ee2e100000000p-3, 0x1.1476603332c46p-25),
-    (double2)(0x1.f5b7500000000p-3, 0x1.f25901bac55b7p-24),
-    (double2)(0x1.fd3d100000000p-3, 0x1.f881b7c826e28p-24),
-    (double2)(0x1.025fa00000000p-2, 0x1.441996d698d20p-24),
-    (double2)(0x1.061ee00000000p-2, 0x1.407ac521ea089p-23),
-    (double2)(0x1.09dc500000000p-2, 0x1.2fb0c6c4b1723p-23),
-    (double2)(0x1.0d97e00000000p-2, 0x1.ca135966a3e18p-23),
-    (double2)(0x1.1151a00000000p-2, 0x1.b1218e4d646e4p-25),
-    (double2)(0x1.1509700000000p-2, 0x1.d4e72a350d288p-25),
-    (double2)(0x1.18bf500000000p-2, 0x1.4617e2f04c329p-23),
-    (double2)(0x1.1c73500000000p-2, 0x1.096ec41e82650p-25),
-    (double2)(0x1.2025500000000p-2, 0x1.9f91f25773e6ep-24),
-    (double2)(0x1.23d5600000000p-2, 0x1.59c0820f1d674p-25),
-    (double2)(0x1.2783700000000p-2, 0x1.02bf7a2df1064p-25),
-    (double2)(0x1.2b2f700000000p-2, 0x1.fb36bfc40508fp-23),
-    (double2)(0x1.2ed9800000000p-2, 0x1.ea08f3f8dc892p-24),
-    (double2)(0x1.3281800000000p-2, 0x1.3ed6254656a0ep-24),
-    (double2)(0x1.3627700000000p-2, 0x1.b83f5e5e69c58p-25),
-    (double2)(0x1.39cb400000000p-2, 0x1.d6ec2af768592p-23),
-    (double2)(0x1.3d6d100000000p-2, 0x1.493889a226f94p-25),
-    (double2)(0x1.410cb00000000p-2, 0x1.5ad8fa65279bap-23),
-    (double2)(0x1.44aa400000000p-2, 0x1.b615784d45434p-25),
-    (double2)(0x1.4845a00000000p-2, 0x1.09a184368f145p-23),
-    (double2)(0x1.4bdee00000000p-2, 0x1.61a2439b0d91cp-24),
-    (double2)(0x1.4f75f00000000p-2, 0x1.ce1a65e39a978p-24),
-    (double2)(0x1.530ad00000000p-2, 0x1.32a39a93b6a66p-23),
-    (double2)(0x1.569d800000000p-2, 0x1.1c3699af804e7p-23),
-    (double2)(0x1.5a2e000000000p-2, 0x1.75e0f4e44ede8p-26),
-    (double2)(0x1.5dbc300000000p-2, 0x1.f77ced1a7a83bp-23),
-    (double2)(0x1.6148400000000p-2, 0x1.84e7f0cb1b500p-29),
-    (double2)(0x1.64d1f00000000p-2, 0x1.ec6b838b02dfep-23),
-    (double2)(0x1.6859700000000p-2, 0x1.3ebf4dfbeda87p-23),
-    (double2)(0x1.6bdea00000000p-2, 0x1.9397aed9cb475p-23),
-    (double2)(0x1.6f61900000000p-2, 0x1.07937bc239c54p-24),
-    (double2)(0x1.72e2200000000p-2, 0x1.aa754553131b6p-23),
-    (double2)(0x1.7660700000000p-2, 0x1.4a05d407c45dcp-24),
-    (double2)(0x1.79dc600000000p-2, 0x1.132231a206dd0p-23),
-    (double2)(0x1.7d56000000000p-2, 0x1.2d8ecfdd69c88p-24),
-    (double2)(0x1.80cd400000000p-2, 0x1.a852c74218606p-24),
-    (double2)(0x1.8442200000000p-2, 0x1.71bf2baeebb50p-23),
-    (double2)(0x1.87b4b00000000p-2, 0x1.83d7db7491820p-27),
-    (double2)(0x1.8b24d00000000p-2, 0x1.ca50d92b6da14p-25),
-    (double2)(0x1.8e92900000000p-2, 0x1.6f5cde8530298p-26),
-    (double2)(0x1.91fde00000000p-2, 0x1.f343198910740p-24),
-    (double2)(0x1.9566d00000000p-2, 0x1.0e8d241ccd80ap-24),
-    (double2)(0x1.98cd500000000p-2, 0x1.1535ac619e6c8p-24),
-    (double2)(0x1.9c31600000000p-2, 0x1.7316041c36cd2p-24),
-    (double2)(0x1.9f93000000000p-2, 0x1.985a000637d8ep-24),
-    (double2)(0x1.a2f2300000000p-2, 0x1.f2f29858c0a68p-25),
-    (double2)(0x1.a64ee00000000p-2, 0x1.879847f96d909p-23),
-    (double2)(0x1.a9a9200000000p-2, 0x1.ab3d319e12e42p-23),
-    (double2)(0x1.ad00f00000000p-2, 0x1.5088162dfc4c2p-24),
-    (double2)(0x1.b056400000000p-2, 0x1.05749a1cd9d8cp-25),
-    (double2)(0x1.b3a9100000000p-2, 0x1.da65c6c6b8618p-26),
-    (double2)(0x1.b6f9600000000p-2, 0x1.739bf7df1ad64p-25),
-    (double2)(0x1.ba47300000000p-2, 0x1.bc31252aa3340p-25),
-    (double2)(0x1.bd92800000000p-2, 0x1.e528191ad3aa8p-26),
-    (double2)(0x1.c0db400000000p-2, 0x1.929d93df19f18p-23),
-    (double2)(0x1.c421900000000p-2, 0x1.ff11eb693a080p-26),
-    (double2)(0x1.c765500000000p-2, 0x1.55ae3f145a3a0p-27),
-    (double2)(0x1.caa6800000000p-2, 0x1.cbcd8c6c0ca82p-24),
-    (double2)(0x1.cde5300000000p-2, 0x1.0cb04d425d304p-24),
-    (double2)(0x1.d121500000000p-2, 0x1.9adfcab5be678p-24),
-    (double2)(0x1.d45ae00000000p-2, 0x1.93d90c5662508p-23),
-    (double2)(0x1.d791f00000000p-2, 0x1.68489bd35ff40p-24),
-    (double2)(0x1.dac6700000000p-2, 0x1.586ed3da2b7e0p-28),
-    (double2)(0x1.ddf8500000000p-2, 0x1.7604d2e850eeep-23),
-    (double2)(0x1.e127b00000000p-2, 0x1.ac1d12bfb53d8p-24),
-    (double2)(0x1.e454800000000p-2, 0x1.9b3d468274740p-28),
-    (double2)(0x1.e77eb00000000p-2, 0x1.fc5d68d10e53cp-24),
-    (double2)(0x1.eaa6500000000p-2, 0x1.8f9e51884becbp-23),
-    (double2)(0x1.edcb600000000p-2, 0x1.a87f0869c06d1p-23),
-    (double2)(0x1.f0ede00000000p-2, 0x1.31e7279f685fap-23),
-    (double2)(0x1.f40dd00000000p-2, 0x1.6a8282f9719b0p-27),
-    (double2)(0x1.f72b200000000p-2, 0x1.0d2724a8a44e0p-25),
-    (double2)(0x1.fa45d00000000p-2, 0x1.a60524b11ad4ep-23),
-    (double2)(0x1.fd5e000000000p-2, 0x1.75fdf832750f0p-26),
-    (double2)(0x1.0039c00000000p-1, 0x1.cf06902e4cd36p-23),
-    (double2)(0x1.01c3400000000p-1, 0x1.e82422d4f6d10p-25),
-    (double2)(0x1.034b700000000p-1, 0x1.24a091063e6c0p-26),
-    (double2)(0x1.04d2500000000p-1, 0x1.8a1a172dc6f38p-24),
-    (double2)(0x1.0657e00000000p-1, 0x1.29b6619f8a92dp-22),
-    (double2)(0x1.07dc300000000p-1, 0x1.9274d9c1b70c8p-24),
-    (double2)(0x1.095f300000000p-1, 0x1.0c34b1fbb7930p-26),
-    (double2)(0x1.0ae0e00000000p-1, 0x1.639866c20eb50p-25),
-    (double2)(0x1.0c61400000000p-1, 0x1.6d6d0f6832e9ep-23),
-    (double2)(0x1.0de0500000000p-1, 0x1.af54def99f25ep-22),
-    (double2)(0x1.0f5e200000000p-1, 0x1.16cfc52a00262p-22),
-    (double2)(0x1.10daa00000000p-1, 0x1.dcc1e83569c32p-23),
-    (double2)(0x1.1255d00000000p-1, 0x1.37f7a551ed425p-22),
-    (double2)(0x1.13cfb00000000p-1, 0x1.f6360adc98887p-22),
-    (double2)(0x1.1548500000000p-1, 0x1.2c6ec8d35a2c1p-22),
-    (double2)(0x1.16bfa00000000p-1, 0x1.bd44df84cb036p-23),
-    (double2)(0x1.1835a00000000p-1, 0x1.117cf826e310ep-22),
-    (double2)(0x1.19aa500000000p-1, 0x1.ca533f332cfc9p-22),
-    (double2)(0x1.1b1dc00000000p-1, 0x1.0f208509dbc2ep-22),
-    (double2)(0x1.1c8fe00000000p-1, 0x1.cd07d93c945dep-23),
-    (double2)(0x1.1e00b00000000p-1, 0x1.57bdfd67e6d72p-22),
-    (double2)(0x1.1f70400000000p-1, 0x1.aab89c516c658p-24),
-    (double2)(0x1.20de800000000p-1, 0x1.3e823b1a1b8a0p-25),
-    (double2)(0x1.224b700000000p-1, 0x1.307464a9d6d3cp-23),
-    (double2)(0x1.23b7100000000p-1, 0x1.c5993cd438843p-22),
-    (double2)(0x1.2521700000000p-1, 0x1.ba2fca02ab554p-22),
-    (double2)(0x1.268a900000000p-1, 0x1.01a5b6983a268p-23),
-    (double2)(0x1.27f2600000000p-1, 0x1.273d1b350efc8p-25),
-    (double2)(0x1.2958e00000000p-1, 0x1.64c238c37b0c6p-23),
-    (double2)(0x1.2abe200000000p-1, 0x1.aded07370a300p-25),
-    (double2)(0x1.2c22100000000p-1, 0x1.78091197eb47ep-23),
-    (double2)(0x1.2d84c00000000p-1, 0x1.4b0f245e0dabcp-24),
-    (double2)(0x1.2ee6200000000p-1, 0x1.080d9794e2eafp-22),
-    (double2)(0x1.3046400000000p-1, 0x1.d4ec242b60c76p-23),
-    (double2)(0x1.31a5200000000p-1, 0x1.221d2f940caa0p-27),
-    (double2)(0x1.3302b00000000p-1, 0x1.cdbc42b2bba5cp-24),
-    (double2)(0x1.345f000000000p-1, 0x1.cce37bb440840p-25),
-    (double2)(0x1.35ba000000000p-1, 0x1.6c1d999cf1dd0p-22),
-    (double2)(0x1.3713d00000000p-1, 0x1.bed8a07eb0870p-26),
-    (double2)(0x1.386c500000000p-1, 0x1.69ed88f490e3cp-24),
-    (double2)(0x1.39c3900000000p-1, 0x1.cd41719b73ef0p-25),
-    (double2)(0x1.3b19800000000p-1, 0x1.cbc4ac95b41b7p-22),
-    (double2)(0x1.3c6e400000000p-1, 0x1.238f1b890f5d7p-22),
-    (double2)(0x1.3dc1c00000000p-1, 0x1.50c4282259cc4p-24),
-    (double2)(0x1.3f13f00000000p-1, 0x1.713d2de87b3e2p-22),
-    (double2)(0x1.4064f00000000p-1, 0x1.1d5a7d2255276p-23),
-    (double2)(0x1.41b4a00000000p-1, 0x1.c0dfd48227ac1p-22),
-    (double2)(0x1.4303200000000p-1, 0x1.1c964dab76753p-22),
-    (double2)(0x1.4450600000000p-1, 0x1.6de56d5704496p-23),
-    (double2)(0x1.459c600000000p-1, 0x1.4aeb71fd19968p-23),
-    (double2)(0x1.46e7200000000p-1, 0x1.fbf91c57b1918p-23),
-    (double2)(0x1.4830a00000000p-1, 0x1.d6bef7fbe5d9ap-22),
-    (double2)(0x1.4978f00000000p-1, 0x1.464d3dc249066p-22),
-    (double2)(0x1.4ac0000000000p-1, 0x1.638e2ec4d9073p-22),
-    (double2)(0x1.4c05e00000000p-1, 0x1.16f4a7247ea7cp-24),
-    (double2)(0x1.4d4a800000000p-1, 0x1.1a0a740f1d440p-28),
-    (double2)(0x1.4e8de00000000p-1, 0x1.6edbb0114a33cp-23),
-    (double2)(0x1.4fd0100000000p-1, 0x1.dbee8bf1d513cp-24),
-    (double2)(0x1.5111000000000p-1, 0x1.5b8bdb0248f73p-22),
-    (double2)(0x1.5250c00000000p-1, 0x1.7de3d3f5eac64p-22),
-    (double2)(0x1.538f500000000p-1, 0x1.ee24187ae448ap-23),
-    (double2)(0x1.54cca00000000p-1, 0x1.e06c591ec5192p-22),
-    (double2)(0x1.5608d00000000p-1, 0x1.4e3861a332738p-24),
-    (double2)(0x1.5743c00000000p-1, 0x1.a9599dcc2bfe4p-24),
-    (double2)(0x1.587d800000000p-1, 0x1.f732fbad43468p-25),
-    (double2)(0x1.59b6000000000p-1, 0x1.eb9f573b727d9p-22),
-    (double2)(0x1.5aed600000000p-1, 0x1.8b212a2eb9897p-22),
-    (double2)(0x1.5c23900000000p-1, 0x1.384884c167215p-22),
-    (double2)(0x1.5d58900000000p-1, 0x1.0e2d363020051p-22),
-    (double2)(0x1.5e8c600000000p-1, 0x1.2820879fbd022p-22),
-    (double2)(0x1.5fbf000000000p-1, 0x1.a1ab9893e4b30p-22),
-    (double2)(0x1.60f0800000000p-1, 0x1.2d1b817a24478p-23),
-    (double2)(0x1.6220d00000000p-1, 0x1.15d7b8ded4878p-25),
-    (double2)(0x1.634ff00000000p-1, 0x1.8968f9db3a5e4p-24),
-    (double2)(0x1.647de00000000p-1, 0x1.71c4171fe135fp-22),
-    (double2)(0x1.65aab00000000p-1, 0x1.6d80f605d0d8cp-22),
-    (double2)(0x1.66d6600000000p-1, 0x1.c91f043691590p-24),
-    (double2)(0x1.6800e00000000p-1, 0x1.39f8a15fce2b2p-23),
-    (double2)(0x1.692a400000000p-1, 0x1.55beda9d94b80p-27),
-    (double2)(0x1.6a52700000000p-1, 0x1.b12c15d60949ap-23),
-    (double2)(0x1.6b79800000000p-1, 0x1.24167b312bfe3p-22),
-    (double2)(0x1.6c9f700000000p-1, 0x1.0ab8633070277p-22),
-    (double2)(0x1.6dc4400000000p-1, 0x1.54554ebbc80eep-23),
-    (double2)(0x1.6ee7f00000000p-1, 0x1.0204aef5a4bb8p-25),
-    (double2)(0x1.700a700000000p-1, 0x1.8af08c679cf2cp-22),
-    (double2)(0x1.712be00000000p-1, 0x1.0852a330ae6c8p-22),
-    (double2)(0x1.724c300000000p-1, 0x1.6d3eb9ec32916p-23),
-    (double2)(0x1.736b600000000p-1, 0x1.685cb7fcbbafep-23),
-    (double2)(0x1.7489700000000p-1, 0x1.1f751c1e0bd95p-22),
-    (double2)(0x1.75a6700000000p-1, 0x1.705b1b0f72560p-26),
-    (double2)(0x1.76c2400000000p-1, 0x1.b98d8d808ca92p-22),
-    (double2)(0x1.77dd100000000p-1, 0x1.2ea22c75cc980p-25),
-    (double2)(0x1.78f6b00000000p-1, 0x1.7aba62bca0350p-22),
-    (double2)(0x1.7a0f400000000p-1, 0x1.d73833442278cp-22),
-    (double2)(0x1.7b26c00000000p-1, 0x1.5a5ca1fb18bf9p-22),
-    (double2)(0x1.7c3d300000000p-1, 0x1.1a6092b6ecf28p-25),
-    (double2)(0x1.7d52800000000p-1, 0x1.44fd049aac104p-24),
-    (double2)(0x1.7e66c00000000p-1, 0x1.c114fd8df5180p-29),
-    (double2)(0x1.7f79e00000000p-1, 0x1.5972f130feae5p-22),
-    (double2)(0x1.808c000000000p-1, 0x1.ca034a55fe198p-24),
-    (double2)(0x1.819d000000000p-1, 0x1.6e2b149990227p-22),
-    (double2)(0x1.82ad000000000p-1, 0x1.b00000294592cp-24),
-    (double2)(0x1.83bbe00000000p-1, 0x1.8b9bdc442620ep-22),
-    (double2)(0x1.84c9c00000000p-1, 0x1.d94fdfabf3e4ep-23),
-    (double2)(0x1.85d6900000000p-1, 0x1.5db30b145ad9ap-23),
-    (double2)(0x1.86e2500000000p-1, 0x1.e3e1eb95022b0p-23),
-    (double2)(0x1.87ed000000000p-1, 0x1.d5b8b45442bd6p-22),
-    (double2)(0x1.88f6b00000000p-1, 0x1.7a046231ecd2ep-22),
-    (double2)(0x1.89ff500000000p-1, 0x1.feafe3ef55232p-22),
-    (double2)(0x1.8b06f00000000p-1, 0x1.839e7bfd78267p-22),
-    (double2)(0x1.8c0d900000000p-1, 0x1.45cf49d6fa900p-25),
-    (double2)(0x1.8d13200000000p-1, 0x1.be3132b27f380p-27),
-    (double2)(0x1.8e17a00000000p-1, 0x1.533980bb84f9fp-22),
-    (double2)(0x1.8f1b300000000p-1, 0x1.889e2ce3ba390p-26),
-    (double2)(0x1.901db00000000p-1, 0x1.f7778c3ad0cc8p-24),
-    (double2)(0x1.911f300000000p-1, 0x1.46660cec4eba2p-23),
-    (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
-};
-
 DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
     (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
     (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
@@ -2197,7 +1945,6 @@ DECLARE_TABLE(double2, LOG_F_INV_TBL, 258) = {
     (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
 };
 
-TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
 TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
 TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl);
 TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl);

>From 50d1baf2e2d85f7104ce1ed954431a86f380ac14 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Thu, 27 Mar 2025 10:30:16 +0000
Subject: [PATCH 2/2] fix formatting

---
 libclc/clc/lib/generic/math/clc_atan2.inc | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/libclc/clc/lib/generic/math/clc_atan2.inc 
b/libclc/clc/lib/generic/math/clc_atan2.inc
index 0917f3adf2d90..61ffeebbc5d11 100644
--- a/libclc/clc/lib/generic/math/clc_atan2.inc
+++ b/libclc/clc/lib/generic/math/clc_atan2.inc
@@ -8,7 +8,8 @@
 
 #if __CLC_FPSIZE == 32
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, 
__CLC_GENTYPE x) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y,
+                                                 __CLC_GENTYPE x) {
   const __CLC_GENTYPE pi = 0x1.921fb6p+1f;
   const __CLC_GENTYPE piby2 = 0x1.921fb6p+0f;
   const __CLC_GENTYPE piby4 = 0x1.921fb6p-1f;
@@ -71,7 +72,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE 
__clc_atan2(__CLC_GENTYPE y, __CLC_GENTYPE
 
 #elif __CLC_FPSIZE == 64
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y, 
__CLC_GENTYPE x) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atan2(__CLC_GENTYPE y,
+                                                 __CLC_GENTYPE x) {
   const __CLC_GENTYPE pi = 3.1415926535897932e+00;    /* 0x400921fb54442d18 */
   const __CLC_GENTYPE piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
   const __CLC_GENTYPE piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
@@ -142,7 +144,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE 
__clc_atan2(__CLC_GENTYPE y, __CLC_GENTYPE
     __CLC_GENTYPE vm = __clc_ldexp(v, m);
 
     // 26 leading bits of u
-    __CLC_GENTYPE u1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 
0xfffffffff8000000UL);
+    __CLC_GENTYPE u1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(um) & 0xfffffffff8000000UL);
     __CLC_GENTYPE u2 = um - u1;
 
     __CLC_GENTYPE r = MATH_DIVIDE(__clc_fma(-c, u2, __clc_fma(-c, u1, vm)),
@@ -163,9 +166,11 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE 
__clc_atan2(__CLC_GENTYPE y, __CLC_GENTYPE
 
   __CLC_GENTYPE q5, q6;
   {
-    __CLC_GENTYPE u1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 
0xffffffff00000000UL);
+    __CLC_GENTYPE u1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(u) & 0xffffffff00000000UL);
     __CLC_GENTYPE u2 = u - u1;
-    __CLC_GENTYPE vu1 = __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 
0xffffffff00000000UL);
+    __CLC_GENTYPE vu1 =
+        __CLC_AS_GENTYPE(__CLC_AS_ULONGN(vbyu) & 0xffffffff00000000UL);
     __CLC_GENTYPE vu2 = vbyu - vu1;
 
     q5 = 0.0;

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to