arsenm updated this revision to Diff 546164.
arsenm retitled this revision from "[wip] clang/OpenCL: Add inline 
implementations of sqrt in builtin header" to "clang/OpenCL: Add inline 
implementations of sqrt in builtin header".
arsenm edited the summary of this revision.
arsenm added a comment.

Move to base header and fix test. update_cc_test_checks 
--include-generated-funcs does a terrible job here so write manually checks


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D156743/new/

https://reviews.llvm.org/D156743

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang/Basic/Builtins.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/opencl-c-base.h
  clang/lib/Headers/opencl-c.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-elementwise-math.c
  clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
  clang/test/CodeGenCUDA/correctly-rounded-div.cu
  clang/test/CodeGenOpenCL/fpmath.cl
  clang/test/CodeGenOpenCL/sqrt-fpmath.cl
  clang/test/Sema/builtins-elementwise-math.c
  clang/test/SemaCXX/builtins-elementwise-math.cpp

Index: clang/test/SemaCXX/builtins-elementwise-math.cpp
===================================================================
--- clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -111,6 +111,13 @@
   static_assert(!is_const<decltype(__builtin_elementwise_sin(b))>::value);
 }
 
+void test_builtin_elementwise_sqrt() {
+  const float a = 42.0;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_sqrt(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_sqrt(b))>::value);
+}
+
 void test_builtin_elementwise_log() {
   const float a = 42.0;
   float b = 42.3;
Index: clang/test/Sema/builtins-elementwise-math.c
===================================================================
--- clang/test/Sema/builtins-elementwise-math.c
+++ clang/test/Sema/builtins-elementwise-math.c
@@ -601,6 +601,27 @@
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_sqrt(f);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_sqrt();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_sqrt(i);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_sqrt(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_sqrt(u);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_sqrt(uv);
+  // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
Index: clang/test/CodeGenOpenCL/fpmath.cl
===================================================================
--- clang/test/CodeGenOpenCL/fpmath.cl
+++ clang/test/CodeGenOpenCL/fpmath.cl
@@ -28,6 +28,21 @@
   return __builtin_sqrtf(a);
 }
 
+float elementwise_sqrt_f32(float a) {
+  // CHECK-LABEL: @elementwise_sqrt_f32
+  // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+float4 elementwise_sqrt_v4f32(float4 a) {
+  // CHECK-LABEL: @elementwise_sqrt_v4f32
+  // NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
+  // DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+
 #if __OPENCL_C_VERSION__ >=120
 void printf(constant char* fmt, ...);
 
@@ -61,6 +76,18 @@
   return __builtin_sqrt(a);
 }
 
+double elementwise_sqrt_f64(double a) {
+  // CHECK-LABEL: @elementwise_sqrt_f64
+  // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
+double4 elementwise_sqrt_v4f64(double4 a) {
+  // CHECK-LABEL: @elementwise_sqrt_v4f64
+  // CHECK: call <4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.+}}){{$}}
+  return __builtin_elementwise_sqrt(a);
+}
+
 #endif
 
 // NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
Index: clang/test/CodeGenCUDA/correctly-rounded-div.cu
===================================================================
--- clang/test/CodeGenCUDA/correctly-rounded-div.cu
+++ clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -46,4 +46,18 @@
   return __builtin_sqrt(a);
 }
 
+// COMMON-LABEL: @_Z28test_builtin_elementwise_f32f
+// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
+// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
+__device__ float test_builtin_elementwise_f32(float a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
+// COMMON-LABEL: @_Z28test_builtin_elementwise_f64d
+// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
+// COMMON-NOT: !fpmath
+__device__ double test_builtin_elementwise_f64(double a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
 // NCRSQRT: ![[MD]] = !{float 2.500000e+00}
Index: clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
===================================================================
--- clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
+++ clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -177,6 +177,16 @@
   return __builtin_elementwise_sin(a);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z23strict_elementwise_sqrtDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float4 strict_elementwise_sqrt(float4 a) {
+  return __builtin_elementwise_sqrt(a);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
Index: clang/test/CodeGen/builtins-elementwise-math.c
===================================================================
--- clang/test/CodeGen/builtins-elementwise-math.c
+++ clang/test/CodeGen/builtins-elementwise-math.c
@@ -588,6 +588,22 @@
   vf2 = __builtin_elementwise_sin(vf1);
 }
 
+void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_sqrt(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.sqrt.f32(float [[F1]])
+  f2 = __builtin_elementwise_sqrt(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.sqrt.f64(double [[D1]])
+  d2 = __builtin_elementwise_sqrt(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_sqrt(vf1);
+}
+
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2641,6 +2641,7 @@
   case Builtin::BI__builtin_elementwise_rint:
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
+  case Builtin::BI__builtin_elementwise_sqrt:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -2544,7 +2544,8 @@
     case Builtin::BI__builtin_sqrtf:
     case Builtin::BI__builtin_sqrtf16:
     case Builtin::BI__builtin_sqrtl:
-    case Builtin::BI__builtin_sqrtf128: {
+    case Builtin::BI__builtin_sqrtf128:
+    case Builtin::BI__builtin_elementwise_sqrt: {
       llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
           *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
       SetSqrtFPAccuracy(Call);
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -694,6 +694,7 @@
 BUILTIN(__builtin_elementwise_rint, "v.", "nct")
 BUILTIN(__builtin_elementwise_nearbyint, "v.", "nct")
 BUILTIN(__builtin_elementwise_sin, "v.", "nct")
+BUILTIN(__builtin_elementwise_sqrt, "v.", "nct")
 BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
 BUILTIN(__builtin_elementwise_canonicalize, "v.", "nct")
 BUILTIN(__builtin_elementwise_copysign, "v.", "nct")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -643,6 +643,8 @@
  T __builtin_elementwise_bitreverse(T x)     return the integer represented after reversing the bits of x     integer types
  T __builtin_elementwise_exp(T x)            returns the base-e exponential, e^x, of the specified value      floating point types
  T __builtin_elementwise_exp2(T x)           returns the base-2 exponential, 2^x, of the specified value      floating point types
+
+ T __builtin_elementwise_sqrt(T x)           return the square root of a floating-point number                floating point types
  T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
                                              rounding halfway cases to even (that is, to the nearest value
                                              that is an even integer), regardless of the current rounding
Index: clang/test/CodeGenOpenCL/sqrt-fpmath.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/sqrt-fpmath.cl
@@ -0,0 +1,201 @@
+// Test that float variants of sqrt are emitted as available_externally inline
+// definitions that call the sqrt intrinsic with appropriate !fpmath metadata
+// depending on -cl-fp32-correctly-rounded-divide-sqrt
+
+// Test with -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
+
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
+
+// Test without -fdeclare-opencl-builtins
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED %s
+
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT-UNSAFE %s
+// RUN: %clang_cc1 -disable-llvm-passes -triple amdgcn-unknown-unknown -finclude-default-header -cl-fp32-correctly-rounded-divide-sqrt -cl-unsafe-math-optimizations -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,CORRECTLYROUNDED-UNSAFE %s
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CHECK-LABEL: define {{.*}} float @call_sqrt_f32(
+// CHECK: call {{.*}} float @_Z4sqrtf(float noundef %{{.+}}) #{{[0-9]+$}}
+float call_sqrt_f32(float x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally float @_Z4sqrtf(float noundef %__x)
+// DEFAULT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn float @llvm.sqrt.f32(float %{{.+}}){{$}}
+
+// CHECK-LABEL: define {{.*}} <2 x float> @call_sqrt_v2f32(
+// CHECK: call {{.*}} <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %{{.*}}) #{{[0-9]+$}}
+float2 call_sqrt_v2f32(float2 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally <2 x float> @_Z4sqrtDv2_f(<2 x float> noundef %__x)
+// DEFAULT: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.+}}){{$}}
+
+// CHECK-LABEL: define {{.*}} <3 x float> @call_sqrt_v3f32(
+// CHECK: call {{.*}} <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %{{.*}}) #{{[0-9]+$}}
+float3 call_sqrt_v3f32(float3 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally <3 x float> @_Z4sqrtDv3_f(<3 x float> noundef %__x)
+// DEFAULT: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <3 x float> @llvm.sqrt.v3f32(<3 x float> %{{.+}}){{$}}
+
+
+// CHECK-LABEL: define {{.*}} <4 x float> @call_sqrt_v4f32(
+// CHECK: call {{.*}} <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %{{.*}}) #{{[0-9]+$}}
+float4 call_sqrt_v4f32(float4 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally <4 x float> @_Z4sqrtDv4_f(<4 x float> noundef %__x)
+// DEFAULT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
+
+// CHECK-LABEL: define {{.*}} <8 x float> @call_sqrt_v8f32(
+// CHECK: call {{.*}} <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %{{.*}}) #{{[0-9]+$}}
+float8 call_sqrt_v8f32(float8 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally <8 x float> @_Z4sqrtDv8_f(<8 x float> noundef %__x)
+// DEFAULT: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.+}}){{$}}
+
+
+// CHECK-LABEL: define {{.*}} <16 x float> @call_sqrt_v16f32(
+// CHECK: call {{.*}} <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %{{.*}}) #{{[0-9]+$}}
+float16 call_sqrt_v16f32(float16 x) {
+  return sqrt(x);
+}
+
+// CHECK-LABEL: define available_externally <16 x float> @_Z4sqrtDv16_f(<16 x float> noundef %__x)
+// DEFAULT: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
+
+// DEFAULT-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}), !fpmath [[$FPMATH:![0-9]+]]{{$}}
+// CORRECTLYROUNDED-UNSAFE: call reassoc nsz arcp contract afn <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.+}}){{$}}
+
+
+// Not for f64
+// CHECK-LABEL: define {{.*}} double @call_sqrt_f64(
+// CHECK: call {{.*}} double @_Z4sqrtd(double noundef %{{.+}}) #{{[0-9]+$}}
+double call_sqrt_f64(double x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// Not for f64
+// CHECK-LABEL: define {{.*}} <2 x double> @call_sqrt_v2f64(
+// CHECK: call {{.*}} <2 x double> @_Z4sqrtDv2_d(<2 x double> noundef %{{.+}}) #{{[0-9]+$}}
+double2 call_sqrt_v2f64(double2 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <3 x double> @call_sqrt_v3f64(
+// CHECK: call {{.*}} <3 x double> @_Z4sqrtDv3_d(<3 x double> noundef %{{.+}}) #{{[0-9]+$}}
+double3 call_sqrt_v3f64(double3 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <4 x double> @call_sqrt_v4f64(
+// CHECK: call {{.*}} <4 x double> @_Z4sqrtDv4_d(<4 x double> noundef %{{.+}}) #{{[0-9]+$}}
+double4 call_sqrt_v4f64(double4 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <8 x double> @call_sqrt_v8f64(
+// CHECK: call {{.*}} <8 x double> @_Z4sqrtDv8_d(<8 x double> noundef %{{.+}}) #{{[0-9]+$}}
+double8 call_sqrt_v8f64(double8 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <16 x double> @call_sqrt_v16f64(
+// CHECK: call {{.*}} <16 x double> @_Z4sqrtDv16_d(<16 x double> noundef %{{.+}}) #{{[0-9]+$}}
+double16 call_sqrt_v16f64(double16 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// Not for f16
+// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
+// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}
+half call_sqrt_f16(half x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
+// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}
+half2 call_sqrt_v2f16(half2 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
+// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}
+half3 call_sqrt_v3f16(half3 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
+// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}
+half4 call_sqrt_v4f16(half4 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
+// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}
+half8 call_sqrt_v8f16(half8 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
+// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}
+half16 call_sqrt_v16f16(half16 x) {
+  return sqrt(x);
+}
+
+// CHECK-NOT: define
+
+// DEFAULT: [[$FPMATH]] = !{float 3.000000e+00}
Index: clang/lib/Headers/opencl-c.h
===================================================================
--- clang/lib/Headers/opencl-c.h
+++ clang/lib/Headers/opencl-c.h
@@ -8496,32 +8496,6 @@
 half16 __ovld __cnfn sinpi(half16);
 #endif //cl_khr_fp16
 
-/**
- * Compute square root.
- */
-float __ovld __cnfn sqrt(float);
-float2 __ovld __cnfn sqrt(float2);
-float3 __ovld __cnfn sqrt(float3);
-float4 __ovld __cnfn sqrt(float4);
-float8 __ovld __cnfn sqrt(float8);
-float16 __ovld __cnfn sqrt(float16);
-#ifdef cl_khr_fp64
-double __ovld __cnfn sqrt(double);
-double2 __ovld __cnfn sqrt(double2);
-double3 __ovld __cnfn sqrt(double3);
-double4 __ovld __cnfn sqrt(double4);
-double8 __ovld __cnfn sqrt(double8);
-double16 __ovld __cnfn sqrt(double16);
-#endif //cl_khr_fp64
-#ifdef cl_khr_fp16
-half __ovld __cnfn sqrt(half);
-half2 __ovld __cnfn sqrt(half2);
-half3 __ovld __cnfn sqrt(half3);
-half4 __ovld __cnfn sqrt(half4);
-half8 __ovld __cnfn sqrt(half8);
-half16 __ovld __cnfn sqrt(half16);
-#endif //cl_khr_fp16
-
 /**
  * Compute tangent.
  */
Index: clang/lib/Headers/opencl-c-base.h
===================================================================
--- clang/lib/Headers/opencl-c-base.h
+++ clang/lib/Headers/opencl-c-base.h
@@ -819,6 +819,64 @@
 
 #endif // cl_intel_device_side_avc_motion_estimation
 
+/**
+ * Compute square root.
+ *
+ * Provide inline implementations using the builtin so that we get appropriate
+ * !fpmath based on -cl-fp32-correctly-rounded-divide-sqrt, attached to
+ * llvm.sqrt. The implementation should still provide an external definition.
+ */
+#define __ovld __attribute__((overloadable))
+#define __cnfn __attribute__((const))
+
+inline float __ovld __cnfn sqrt(float __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float2 __ovld __cnfn sqrt(float2 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float3 __ovld __cnfn sqrt(float3 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float4 __ovld __cnfn sqrt(float4 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float8 __ovld __cnfn sqrt(float8 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+inline float16 __ovld __cnfn sqrt(float16 __x) {
+  return __builtin_elementwise_sqrt(__x);
+}
+
+// We only really want to define the float variants here. However bad things
+// seem to happen with -fdeclare-opencl-builtins and splitting the handling of
+// different overloads.
+
+#ifdef cl_khr_fp64
+double __ovld __cnfn sqrt(double);
+double2 __ovld __cnfn sqrt(double2);
+double3 __ovld __cnfn sqrt(double3);
+double4 __ovld __cnfn sqrt(double4);
+double8 __ovld __cnfn sqrt(double8);
+double16 __ovld __cnfn sqrt(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sqrt(half);
+half2 __ovld __cnfn sqrt(half2);
+half3 __ovld __cnfn sqrt(half3);
+half4 __ovld __cnfn sqrt(half4);
+half8 __ovld __cnfn sqrt(half8);
+half16 __ovld __cnfn sqrt(half16);
+#endif //cl_khr_fp16
+
+#undef __cnfn
+#undef __ovld
+
 // Disable any extensions we may have enabled previously.
 #pragma OPENCL EXTENSION all : disable
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to