arsenm created this revision. arsenm added a reviewer: tstellarAMD. arsenm added a subscriber: cfe-commits.
Keep the one still used by libclc that works around for now. Emit the new amdgcn intrinsic name if not targeting r600, in which case the old AMDGPU name is still used. http://reviews.llvm.org/D16444 Files: include/clang/Basic/BuiltinsAMDGPU.def lib/CodeGen/CGBuiltin.cpp test/CodeGenOpenCL/builtins-amdgcn.cl test/CodeGenOpenCL/builtins-r600.cl
Index: test/CodeGenOpenCL/builtins-r600.cl =================================================================== --- test/CodeGenOpenCL/builtins-r600.cl +++ test/CodeGenOpenCL/builtins-r600.cl @@ -1,143 +1,32 @@ -// REQUIRES: r600-registered-target +// REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable -// CHECK-LABEL: @test_div_scale_f64 -// CHECK: call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) -// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 -// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f64(global double* out, global int* flagout, double a, double b) -{ - bool flag; - *out = __builtin_amdgpu_div_scale(a, b, true, &flag); - *flagout = flag; -} - -// CHECK-LABEL: @test_div_scale_f32 -// CHECK: call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) -// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 -// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f32(global float* out, global int* flagout, float a, float b) -{ - bool flag; - *out = __builtin_amdgpu_div_scalef(a, b, true, &flag); - *flagout = flag; -} - -// CHECK-LABEL: @test_div_fmas_f32 -// CHECK: call float @llvm.AMDGPU.div.fmas.f32 -void test_div_fmas_f32(global float* out, float a, float b, float c, int d) -{ - *out = __builtin_amdgpu_div_fmasf(a, b, c, d); -} - -// CHECK-LABEL: @test_div_fmas_f64 -// CHECK: call double @llvm.AMDGPU.div.fmas.f64 -void test_div_fmas_f64(global double* out, double a, double b, double c, int d) -{ - *out = __builtin_amdgpu_div_fmas(a, b, c, d); -} - -// CHECK-LABEL: @test_div_fixup_f32 -// CHECK: call float @llvm.AMDGPU.div.fixup.f32 -void test_div_fixup_f32(global float* out, float a, float b, float c) -{ - *out = __builtin_amdgpu_div_fixupf(a, b, c); -} - -// CHECK-LABEL: @test_div_fixup_f64 -// CHECK: call double @llvm.AMDGPU.div.fixup.f64 -void test_div_fixup_f64(global double* out, double a, double b, double c) -{ - *out = __builtin_amdgpu_div_fixup(a, b, c); -} - -// CHECK-LABEL: @test_trig_preop_f32 -// CHECK: call float @llvm.AMDGPU.trig.preop.f32 -void test_trig_preop_f32(global float* out, float a, int b) -{ - *out = __builtin_amdgpu_trig_preopf(a, b); -} - -// CHECK-LABEL: @test_trig_preop_f64 -// CHECK: call double @llvm.AMDGPU.trig.preop.f64 -void test_trig_preop_f64(global double* out, double a, int b) -{ - *out = __builtin_amdgpu_trig_preop(a, b); -} - -// CHECK-LABEL: @test_rcp_f32 -// CHECK: call float @llvm.AMDGPU.rcp.f32 -void test_rcp_f32(global float* out, float a) -{ - *out = __builtin_amdgpu_rcpf(a); -} - -// CHECK-LABEL: @test_rcp_f64 -// CHECK: call double @llvm.AMDGPU.rcp.f64 -void test_rcp_f64(global double* out, double a) -{ - *out = __builtin_amdgpu_rcp(a); -} - // CHECK-LABEL: @test_rsq_f32 -// CHECK: call float @llvm.AMDGPU.rsq.f32 +// CHECK: call float @llvm.r600.rsq.f32 void test_rsq_f32(global float* out, float a) { *out = __builtin_amdgpu_rsqf(a); } // CHECK-LABEL: @test_rsq_f64 -// CHECK: call double @llvm.AMDGPU.rsq.f64 +// CHECK: call double @llvm.r600.rsq.f64 void test_rsq_f64(global double* out, double a) { *out = __builtin_amdgpu_rsq(a); } -// CHECK-LABEL: @test_rsq_clamped_f32 -// CHECK: call float @llvm.AMDGPU.rsq.clamped.f32 -void test_rsq_clamped_f32(global float* out, float a) -{ - *out = __builtin_amdgpu_rsq_clampedf(a); -} - -// CHECK-LABEL: @test_rsq_clamped_f64 -// CHECK: call double @llvm.AMDGPU.rsq.clamped.f64 -void test_rsq_clamped_f64(global double* out, double a) -{ - *out = __builtin_amdgpu_rsq_clamped(a); -} - -// CHECK-LABEL: @test_ldexp_f32 +// CHECK-LABEL: @test_legacy_ldexp_f32 // CHECK: call float @llvm.AMDGPU.ldexp.f32 -void test_ldexp_f32(global float* out, float a, int b) +void test_legacy_ldexp_f32(global float* out, float a, int b) { *out = __builtin_amdgpu_ldexpf(a, b); } // CHECK-LABEL: @test_ldexp_f64 // CHECK: call double @llvm.AMDGPU.ldexp.f64 -void test_ldexp_f64(global double* out, double a, int b) +void test_legacy_ldexp_f64(global double* out, double a, int b) { *out = __builtin_amdgpu_ldexp(a, b); } - -// CHECK-LABEL: @test_class_f32 -// CHECK: call i1 @llvm.AMDGPU.class.f32 -void test_class_f32(global float* out, float a, int b) -{ - *out = __builtin_amdgpu_classf(a, b); -} - -// CHECK-LABEL: @test_class_f64 -// CHECK: call i1 @llvm.AMDGPU.class.f64 -void test_class_f64(global double* out, double a, int b) -{ - *out = __builtin_amdgpu_class(a, b); -} Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -0,0 +1,173 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +// CHECK-LABEL: @test_div_scale_f64 +// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f64(global double* out, global int* flagout, double a, double b) +{ + bool flag; + *out = __builtin_amdgcn_div_scale(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_scale_f32 +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 +// CHECK: store i32 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +{ + bool flag; + *out = __builtin_amdgcn_div_scalef(a, b, true, &flag); + *flagout = flag; +} + +// CHECK-LABEL: @test_div_fmas_f32 +// CHECK: call float @llvm.amdgcn.div.fmas.f32 +void test_div_fmas_f32(global float* out, float a, float b, float c, int d) +{ + *out = __builtin_amdgcn_div_fmasf(a, b, c, d); +} + +// CHECK-LABEL: @test_div_fmas_f64 +// CHECK: call double @llvm.amdgcn.div.fmas.f64 +void test_div_fmas_f64(global double* out, double a, double b, double c, int d) +{ + *out = __builtin_amdgcn_div_fmas(a, b, c, d); +} + +// CHECK-LABEL: @test_div_fixup_f32 +// CHECK: call float @llvm.amdgcn.div.fixup.f32 +void test_div_fixup_f32(global float* out, float a, float b, float c) +{ + *out = __builtin_amdgcn_div_fixupf(a, b, c); +} + +// CHECK-LABEL: @test_div_fixup_f64 +// CHECK: call double @llvm.amdgcn.div.fixup.f64 +void test_div_fixup_f64(global double* out, double a, double b, double c) +{ + *out = __builtin_amdgcn_div_fixup(a, b, c); +} + +// CHECK-LABEL: @test_trig_preop_f32 +// CHECK: call float @llvm.amdgcn.trig.preop.f32 +void test_trig_preop_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_trig_preopf(a, b); +} + +// CHECK-LABEL: @test_trig_preop_f64 +// CHECK: call double @llvm.amdgcn.trig.preop.f64 +void test_trig_preop_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_trig_preop(a, b); +} + +// CHECK-LABEL: @test_rcp_f32 +// CHECK: call float @llvm.amdgcn.rcp.f32 +void test_rcp_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rcpf(a); +} + +// CHECK-LABEL: @test_rcp_f64 +// CHECK: call double @llvm.amdgcn.rcp.f64 +void test_rcp_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rcp(a); +} + +// CHECK-LABEL: @test_rsq_f32 +// CHECK: call float @llvm.amdgcn.rsq.f32 +void test_rsq_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rsqf(a); +} + +// CHECK-LABEL: @test_rsq_f64 +// CHECK: call double @llvm.amdgcn.rsq.f64 +void test_rsq_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rsq(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f32 +// CHECK: call float @llvm.amdgcn.rsq.clamped.f32 +void test_rsq_clamped_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_rsq_clampedf(a); +} + +// CHECK-LABEL: @test_rsq_clamped_f64 +// CHECK: call double @llvm.amdgcn.rsq.clamped.f64 +void test_rsq_clamped_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_rsq_clamped(a); +} + +// CHECK-LABEL: @test_ldexp_f32 +// CHECK: call float @llvm.amdgcn.ldexp.f32 +void test_ldexp_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_ldexpf(a, b); +} + +// CHECK-LABEL: @test_ldexp_f64 +// CHECK: call double @llvm.amdgcn.ldexp.f64 +void test_ldexp_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_ldexp(a, b); +} + +// CHECK-LABEL: @test_class_f32 +// CHECK: call i1 @llvm.amdgcn.class.f32 +void test_class_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgcn_classf(a, b); +} + +// CHECK-LABEL: @test_class_f64 +// CHECK: call i1 @llvm.amdgcn.class.f64 +void test_class_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgcn_class(a, b); +} + + +// Legacy intrinsics with AMDGPU prefix + +// CHECK-LABEL: @test_legacy_rsq_f32 +// CHECK: call float @llvm.amdgcn.rsq.f32 +void test_legacy_rsq_f32(global float* out, float a) +{ + *out = __builtin_amdgpu_rsqf(a); +} + +// CHECK-LABEL: @test_legacy_rsq_f64 +// CHECK: call double @llvm.amdgcn.rsq.f64 +void test_legacy_rsq_f64(global double* out, double a) +{ + *out = __builtin_amdgpu_rsq(a); +} + +// CHECK-LABEL: @test_legacy_ldexp_f32 +// CHECK: call float @llvm.amdgcn.ldexp.f32 +void test_legacy_ldexp_f32(global float* out, float a, int b) +{ + *out = __builtin_amdgpu_ldexpf(a, b); +} + +// CHECK-LABEL: @test_legacy_ldexp_f64 +// CHECK: call double @llvm.amdgcn.ldexp.f64 +void test_legacy_ldexp_f64(global double* out, double a, int b) +{ + *out = __builtin_amdgpu_ldexp(a, b); +} Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -6887,8 +6887,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgpu_div_scale: - case AMDGPU::BI__builtin_amdgpu_div_scalef: { + case AMDGPU::BI__builtin_amdgcn_div_scale: + case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out // argument. @@ -6898,7 +6898,7 @@ llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, + llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -6913,40 +6913,54 @@ Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } - case AMDGPU::BI__builtin_amdgpu_div_fmas: - case AMDGPU::BI__builtin_amdgpu_div_fmasf: { + case AMDGPU::BI__builtin_amdgcn_div_fmas: + case AMDGPU::BI__builtin_amdgcn_div_fmasf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, + llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } - case AMDGPU::BI__builtin_amdgpu_div_fixup: - case AMDGPU::BI__builtin_amdgpu_div_fixupf: - return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); - case AMDGPU::BI__builtin_amdgpu_trig_preop: - case AMDGPU::BI__builtin_amdgpu_trig_preopf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); - case AMDGPU::BI__builtin_amdgpu_rcp: - case AMDGPU::BI__builtin_amdgpu_rcpf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); + case AMDGPU::BI__builtin_amdgcn_div_fixup: + case AMDGPU::BI__builtin_amdgcn_div_fixupf: + return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); + case AMDGPU::BI__builtin_amdgcn_trig_preop: + case AMDGPU::BI__builtin_amdgcn_trig_preopf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); + case AMDGPU::BI__builtin_amdgcn_rcp: + case AMDGPU::BI__builtin_amdgcn_rcpf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_rsq: + case AMDGPU::BI__builtin_amdgcn_rsqf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq); + case AMDGPU::BI__builtin_amdgcn_rsq_clamped: + case AMDGPU::BI__builtin_amdgcn_rsq_clampedf: + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamped); + case AMDGPU::BI__builtin_amdgcn_ldexp: + case AMDGPU::BI__builtin_amdgcn_ldexpf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); + case AMDGPU::BI__builtin_amdgcn_class: + case AMDGPU::BI__builtin_amdgcn_classf: + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); + + // Legacy amdgpu prefix case AMDGPU::BI__builtin_amdgpu_rsq: - case AMDGPU::BI__builtin_amdgpu_rsqf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); - case AMDGPU::BI__builtin_amdgpu_rsq_clamped: - case AMDGPU::BI__builtin_amdgpu_rsq_clampedf: - return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); + case AMDGPU::BI__builtin_amdgpu_rsqf: { + if (getTarget().getTriple().getArch() == Triple::amdgcn) + return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgcn_rsq); + return emitUnaryFPBuiltin(*this, E, Intrinsic::r600_rsq); + } case AMDGPU::BI__builtin_amdgpu_ldexp: - case AMDGPU::BI__builtin_amdgpu_ldexpf: + case AMDGPU::BI__builtin_amdgpu_ldexpf: { + if (getTarget().getTriple().getArch() == Triple::amdgcn) + return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); - case AMDGPU::BI__builtin_amdgpu_class: - case AMDGPU::BI__builtin_amdgpu_classf: - return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); - default: + } + default: return nullptr; } } Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -7,30 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This file defines the R600-specific builtin function database. Users of this -// file must define the BUILTIN macro to make use of this information. +// This file defines the AMDGPU-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. // //===----------------------------------------------------------------------===// // The format of this database matches clang/Basic/Builtins.def. -BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n") -BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n") -BUILTIN(__builtin_amdgpu_div_fmas, "ddddb", "nc") -BUILTIN(__builtin_amdgpu_div_fmasf, "ffffb", "nc") -BUILTIN(__builtin_amdgpu_div_fixup, "dddd", "nc") -BUILTIN(__builtin_amdgpu_div_fixupf, "ffff", "nc") -BUILTIN(__builtin_amdgpu_trig_preop, "ddi", "nc") -BUILTIN(__builtin_amdgpu_trig_preopf, "ffi", "nc") -BUILTIN(__builtin_amdgpu_rcp, "dd", "nc") -BUILTIN(__builtin_amdgpu_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") +BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") +BUILTIN(__builtin_amdgcn_div_fmas, "ddddb", "nc") +BUILTIN(__builtin_amdgcn_div_fmasf, "ffffb", "nc") +BUILTIN(__builtin_amdgcn_div_fixup, "dddd", "nc") +BUILTIN(__builtin_amdgcn_div_fixupf, "ffff", "nc") +BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc") +BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc") +BUILTIN(__builtin_amdgcn_rcp, "dd", "nc") +BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgcn_rsq, "dd", "nc") +BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc") +BUILTIN(__builtin_amdgcn_rsq_clamped, "dd", "nc") +BUILTIN(__builtin_amdgcn_rsq_clampedf, "ff", "nc") +BUILTIN(__builtin_amdgcn_ldexp, "ddi", "nc") +BUILTIN(__builtin_amdgcn_ldexpf, "ffi", "nc") +BUILTIN(__builtin_amdgcn_class, "bdi", "nc") +BUILTIN(__builtin_amdgcn_classf, "bfi", "nc") + +// Legacy names with amdgpu prefix BUILTIN(__builtin_amdgpu_rsq, "dd", "nc") BUILTIN(__builtin_amdgpu_rsqf, "ff", "nc") -BUILTIN(__builtin_amdgpu_rsq_clamped, "dd", "nc") -BUILTIN(__builtin_amdgpu_rsq_clampedf, "ff", "nc") BUILTIN(__builtin_amdgpu_ldexp, "ddi", "nc") BUILTIN(__builtin_amdgpu_ldexpf, "ffi", "nc") -BUILTIN(__builtin_amdgpu_class, "bdi", "nc") -BUILTIN(__builtin_amdgpu_classf, "bfi", "nc") #undef BUILTIN
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits