https://github.com/JinjinLi868 updated https://github.com/llvm/llvm-project/pull/89051
>From f61686e42906886a0686158b3050767e60b576fa Mon Sep 17 00:00:00 2001 From: Jinjin Li <lijinjin....@bytedance.com> Date: Wed, 17 Apr 2024 16:44:50 +0800 Subject: [PATCH] [clang] Fix half && bfloat16 convert node expr codegen Data type conversion between fp16 and bf16 will generate fptrunc and fpextend nodes, but they are actually bitcast nodes. --- clang/lib/CodeGen/CGExprScalar.cpp | 18 +- .../test/CodeGen/X86/bfloat16-convert-half.c | 194 ++++++++++++++++++ 2 files changed, 209 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/X86/bfloat16-convert-half.c diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 1f18e0d5ba409a..1fbeb37de5de60 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1431,9 +1431,13 @@ Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, return Builder.CreateFPToUI(Src, DstTy, "conv"); } - if (DstElementTy->getTypeID() < SrcElementTy->getTypeID()) + if ((DstElementTy->is16bitFPTy() && SrcElementTy->is16bitFPTy())) { + Value *FloatVal = Builder.CreateFPExt(Src, Builder.getFloatTy(), "conv"); + return Builder.CreateFPTrunc(FloatVal, DstTy, "conv"); + } else if (DstElementTy->getTypeID() < SrcElementTy->getTypeID()) return Builder.CreateFPTrunc(Src, DstTy, "conv"); - return Builder.CreateFPExt(Src, DstTy, "conv"); + else + return Builder.CreateFPExt(Src, DstTy, "conv"); } /// Emit a conversion from the specified type to the specified destination type, @@ -1906,7 +1910,15 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { } else { assert(SrcEltTy->isFloatingPointTy() && DstEltTy->isFloatingPointTy() && "Unknown real conversion"); - if (DstEltTy->getTypeID() < SrcEltTy->getTypeID()) + if ((DstEltTy->is16bitFPTy() && SrcEltTy->is16bitFPTy())) { + auto *ScrVecTy = cast<llvm::VectorType>(SrcTy); + Value *FloatVal = Builder.CreateFPExt( + Src, + llvm::VectorType::get(Builder.getFloatTy(), + ScrVecTy->getElementCount()), + "conv"); + Res = Builder.CreateFPTrunc(FloatVal, DstTy, "conv"); + } else if (DstEltTy->getTypeID() < SrcEltTy->getTypeID()) Res = Builder.CreateFPTrunc(Src, DstTy, "conv"); else Res = Builder.CreateFPExt(Src, DstTy, "conv"); diff --git a/clang/test/CodeGen/X86/bfloat16-convert-half.c b/clang/test/CodeGen/X86/bfloat16-convert-half.c new file mode 100644 index 00000000000000..a1b948c873e064 --- /dev/null +++ b/clang/test/CodeGen/X86/bfloat16-convert-half.c @@ -0,0 +1,194 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -S -emit-llvm %s -o - | FileCheck %s +// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16( +// CHECK-SAME: bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to float +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to half +// CHECK-NEXT: ret half [[CONV1]] +// +_Float16 test_convert_from_bf16_to_fp16(__bf16 a) { + return (_Float16)a; +} + +// CHECK-LABEL: define dso_local bfloat @test_convert_from_fp16_to_bf16( +// CHECK-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2 +// CHECK-NEXT: store half [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float +// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[CONV]] to bfloat +// CHECK-NEXT: ret bfloat [[CONV1]] +// +__bf16 test_convert_from_fp16_to_bf16(_Float16 a) { + return (__bf16)a; +} + +typedef _Float16 half2 __attribute__((ext_vector_type(2))); +typedef _Float16 half4 __attribute__((ext_vector_type(4))); + +typedef __bf16 bfloat2 __attribute__((ext_vector_type(2))); +typedef __bf16 bfloat4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local i32 @test_cast_from_fp162_to_bf162( +// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: [[IN:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4 +// CHECK-NEXT: [[IN1:%.*]] = load <2 x half>, ptr [[IN]], align 4 +// CHECK-NEXT: store <2 x half> [[IN1]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[TMP0]] to <2 x bfloat> +// CHECK-NEXT: store <2 x bfloat> [[TMP1]], ptr [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] +// +bfloat2 test_cast_from_fp162_to_bf162(half2 in) { + return (bfloat2)in; +} + + +// CHECK-LABEL: define dso_local double @test_cast_from_fp164_to_bf164( +// CHECK-SAME: double noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: [[IN:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: store double [[IN_COERCE]], ptr [[IN]], align 8 +// CHECK-NEXT: [[IN1:%.*]] = load <4 x half>, ptr [[IN]], align 8 +// CHECK-NEXT: store <4 x half> [[IN1]], ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <4 x bfloat> +// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret double [[TMP2]] +// +bfloat4 test_cast_from_fp164_to_bf164(half4 in) { + return (bfloat4)in; +} + +// CHECK-LABEL: define dso_local i32 @test_cast_from_bf162_to_fp162( +// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: [[IN:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4 +// CHECK-NEXT: [[IN1:%.*]] = load <2 x bfloat>, ptr [[IN]], align 4 +// CHECK-NEXT: store <2 x bfloat> [[IN1]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x bfloat> [[TMP0]] to <2 x half> +// CHECK-NEXT: store <2 x half> [[TMP1]], ptr [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] +// +half2 test_cast_from_bf162_to_fp162(bfloat2 in) { + return (half2)in; +} + + +// CHECK-LABEL: define dso_local double @test_cast_from_bf164_to_fp164( +// CHECK-SAME: double noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: [[IN:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: store double [[IN_COERCE]], ptr [[IN]], align 8 +// CHECK-NEXT: [[IN1:%.*]] = load <4 x bfloat>, ptr [[IN]], align 8 +// CHECK-NEXT: store <4 x bfloat> [[IN1]], ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x bfloat>, ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x bfloat> [[TMP0]] to <4 x half> +// CHECK-NEXT: store <4 x half> [[TMP1]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret double [[TMP2]] +// +half4 test_cast_from_bf164_to_fp164(bfloat4 in) { + return (half4)in; +} + + +// CHECK-LABEL: define dso_local i32 @test_convertvector_from_fp162_to_bf162( +// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: [[IN:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4 +// CHECK-NEXT: [[IN1:%.*]] = load <2 x half>, ptr [[IN]], align 4 +// CHECK-NEXT: store <2 x half> [[IN1]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> +// CHECK-NEXT: [[CONV2:%.*]] = fptrunc <2 x float> [[CONV]] to <2 x bfloat> +// CHECK-NEXT: store <2 x bfloat> [[CONV2]], ptr [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// +bfloat2 test_convertvector_from_fp162_to_bf162(half2 in) { + return __builtin_convertvector(in, bfloat2); +} + +// CHECK-LABEL: define dso_local i32 @test_convertvector_from_bf162_to_fp162( +// CHECK-SAME: i32 noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <2 x half>, align 4 +// CHECK-NEXT: [[IN:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <2 x bfloat>, align 4 +// CHECK-NEXT: store i32 [[IN_COERCE]], ptr [[IN]], align 4 +// CHECK-NEXT: [[IN1:%.*]] = load <2 x bfloat>, ptr [[IN]], align 4 +// CHECK-NEXT: store <2 x bfloat> [[IN1]], ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[IN_ADDR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext <2 x bfloat> [[TMP0]] to <2 x float> +// CHECK-NEXT: [[CONV2:%.*]] = fptrunc <2 x float> [[CONV]] to <2 x half> +// CHECK-NEXT: store <2 x half> [[CONV2]], ptr [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] +// +half2 test_convertvector_from_bf162_to_fp162(bfloat2 in) { + return __builtin_convertvector(in, half2); +} + +// CHECK-LABEL: define dso_local double @test_convertvector_from_fp164_to_bf164( +// CHECK-SAME: double noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: [[IN:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: store double [[IN_COERCE]], ptr [[IN]], align 8 +// CHECK-NEXT: [[IN1:%.*]] = load <4 x half>, ptr [[IN]], align 8 +// CHECK-NEXT: store <4 x half> [[IN1]], ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x half>, ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = fpext <4 x half> [[TMP0]] to <4 x float> +// CHECK-NEXT: [[CONV2:%.*]] = fptrunc <4 x float> [[CONV]] to <4 x bfloat> +// CHECK-NEXT: store <4 x bfloat> [[CONV2]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret double [[TMP1]] +// +bfloat4 test_convertvector_from_fp164_to_bf164(half4 in) { + return __builtin_convertvector(in, bfloat4); +} + +// CHECK-LABEL: define dso_local double @test_convertvector_from_bf164_to_fp164( +// CHECK-SAME: double noundef [[IN_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: [[IN:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca <4 x bfloat>, align 8 +// CHECK-NEXT: store double [[IN_COERCE]], ptr [[IN]], align 8 +// CHECK-NEXT: [[IN1:%.*]] = load <4 x bfloat>, ptr [[IN]], align 8 +// CHECK-NEXT: store <4 x bfloat> [[IN1]], ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x bfloat>, ptr [[IN_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = fpext <4 x bfloat> [[TMP0]] to <4 x float> +// CHECK-NEXT: [[CONV2:%.*]] = fptrunc <4 x float> [[CONV]] to <4 x half> +// CHECK-NEXT: store <4 x half> [[CONV2]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret double [[TMP1]] +// +half4 test_convertvector_from_bf164_to_fp164(bfloat4 in) { + return __builtin_convertvector(in, half4); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits