This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe4888a37d367: [X86][BF16] Enable __bf16 for x86 targets.
(authored by FreddyYe, committed by pengfei).
Changed prior to commit:
https://reviews.llvm.org/D130964?vs=449187&id=451331#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D130964/new/
https://reviews.llvm.org/D130964
Files:
clang/docs/LanguageExtensions.rst
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/X86/bfloat-abi.c
clang/test/CodeGen/X86/bfloat-half-abi.c
clang/test/CodeGen/X86/bfloat-mangle.cpp
clang/test/Sema/vector-decl-crash.c
llvm/include/llvm/IR/Type.h
Index: llvm/include/llvm/IR/Type.h
===================================================================
--- llvm/include/llvm/IR/Type.h
+++ llvm/include/llvm/IR/Type.h
@@ -144,6 +144,11 @@
/// Return true if this is 'bfloat', a 16-bit bfloat type.
bool isBFloatTy() const { return getTypeID() == BFloatTyID; }
+ /// Return true if this is a 16-bit float type.
+ bool is16bitFPTy() const {
+ return getTypeID() == BFloatTyID || getTypeID() == HalfTyID;
+ }
+
/// Return true if this is 'float', a 32-bit IEEE fp type.
bool isFloatTy() const { return getTypeID() == FloatTyID; }
Index: clang/test/Sema/vector-decl-crash.c
===================================================================
--- clang/test/Sema/vector-decl-crash.c
+++ clang/test/Sema/vector-decl-crash.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -triple x86_64-unknown-unknown
+// RUN: %clang_cc1 %s -fsyntax-only -verify -triple riscv64-unknown-unknown
// GH50171
// This would previously crash when __bf16 was not a supported type.
Index: clang/test/CodeGen/X86/bfloat-mangle.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat-mangle.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define {{.*}}void @_Z3foou6__bf16(bfloat noundef %b)
+void foo(__bf16 b) {}
Index: clang/test/CodeGen/X86/bfloat-half-abi.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat-half-abi.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK
+
+struct bfloat1 {
+ __bf16 a;
+};
+
+struct bfloat1 h1(__bf16 a) {
+ // CHECK: define{{.*}}bfloat @
+ struct bfloat1 x;
+ x.a = a;
+ return x;
+}
+
+struct bfloat2 {
+ __bf16 a;
+ __bf16 b;
+};
+
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+ // CHECK: define{{.*}}<2 x bfloat> @
+ struct bfloat2 x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct bfloat3 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+};
+
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat3 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct bfloat4 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+};
+
+struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat4 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ return x;
+}
+
+struct floatbfloat {
+ float a;
+ __bf16 b;
+};
+
+struct floatbfloat fh(float a, __bf16 b) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct floatbfloat x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct floatbfloat2 {
+ float a;
+ __bf16 b;
+ __bf16 c;
+};
+
+struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct floatbfloat2 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct bfloatfloat {
+ __bf16 a;
+ float b;
+};
+
+struct bfloatfloat hf(__bf16 a, float b) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct bfloatfloat x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct bfloat2float {
+ __bf16 a;
+ __bf16 b;
+ float c;
+};
+
+struct bfloat2float h2f(__bf16 a, __bf16 b, float c) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat2float x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct floatbfloat3 {
+ float a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+};
+
+struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) {
+ // CHECK: define{{.*}}{ <4 x half>, bfloat } @
+ struct floatbfloat3 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ return x;
+}
+
+struct bfloat5 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+ __bf16 e;
+};
+
+struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) {
+ // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @
+ struct bfloat5 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ x.e = e;
+ return x;
+}
Index: clang/test/CodeGen/X86/bfloat-abi.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat-abi.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK
+
+struct bfloat1 {
+ __bf16 a;
+};
+
+struct bfloat1 h1(__bf16 a) {
+ // CHECK: define{{.*}}bfloat @
+ struct bfloat1 x;
+ x.a = a;
+ return x;
+}
+
+struct bfloat2 {
+ __bf16 a;
+ __bf16 b;
+};
+
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+ // CHECK: define{{.*}}<2 x bfloat> @
+ struct bfloat2 x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct bfloat3 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+};
+
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat3 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct bfloat4 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+};
+
+struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat4 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ return x;
+}
+
+struct floatbfloat {
+ float a;
+ __bf16 b;
+};
+
+struct floatbfloat fh(float a, __bf16 b) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct floatbfloat x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct floatbfloat2 {
+ float a;
+ __bf16 b;
+ __bf16 c;
+};
+
+struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct floatbfloat2 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct bfloatfloat {
+ __bf16 a;
+ float b;
+};
+
+struct bfloatfloat hf(__bf16 a, float b) {
+ // CHECK: define{{.*}}<4 x half> @
+ struct bfloatfloat x;
+ x.a = a;
+ x.b = b;
+ return x;
+}
+
+struct bfloat2float {
+ __bf16 a;
+ __bf16 b;
+ float c;
+};
+
+struct bfloat2float h2f(__bf16 a, __bf16 b, float c) {
+ // CHECK: define{{.*}}<4 x bfloat> @
+ struct bfloat2float x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ return x;
+}
+
+struct floatbfloat3 {
+ float a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+};
+
+struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) {
+ // CHECK: define{{.*}}{ <4 x half>, bfloat } @
+ struct floatbfloat3 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ return x;
+}
+
+struct bfloat5 {
+ __bf16 a;
+ __bf16 b;
+ __bf16 c;
+ __bf16 d;
+ __bf16 e;
+};
+
+struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) {
+ // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @
+ struct bfloat5 x;
+ x.a = a;
+ x.b = b;
+ x.c = c;
+ x.d = d;
+ x.e = e;
+ return x;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -2861,7 +2861,7 @@
} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
Current = Integer;
} else if (k == BuiltinType::Float || k == BuiltinType::Double ||
- k == BuiltinType::Float16) {
+ k == BuiltinType::Float16 || k == BuiltinType::BFloat16) {
Current = SSE;
} else if (k == BuiltinType::LongDouble) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
@@ -2992,7 +2992,8 @@
Current = Integer;
else if (Size <= 128)
Lo = Hi = Integer;
- } else if (ET->isFloat16Type() || ET == getContext().FloatTy) {
+ } else if (ET->isFloat16Type() || ET == getContext().FloatTy ||
+ ET->isBFloat16Type()) {
Current = SSE;
} else if (ET == getContext().DoubleTy) {
Lo = Hi = SSE;
@@ -3464,9 +3465,9 @@
if (SourceSize > T0Size)
T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
if (T1 == nullptr) {
- // Check if IRType is a half + float. float type will be in IROffset+4 due
+ // Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
// to its alignment.
- if (T0->isHalfTy() && SourceSize > 4)
+ if (T0->is16bitFPTy() && SourceSize > 4)
T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
// If we can't get a second FP type, return a simple half or float.
// avx512fp16-abi.c:pr51813_2 shows it works to return float for
@@ -3478,7 +3479,7 @@
if (T0->isFloatTy() && T1->isFloatTy())
return llvm::FixedVectorType::get(T0, 2);
- if (T0->isHalfTy() && T1->isHalfTy()) {
+ if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
llvm::Type *T2 = nullptr;
if (SourceSize > 4)
T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
@@ -3487,7 +3488,7 @@
return llvm::FixedVectorType::get(T0, 4);
}
- if (T0->isHalfTy() || T1->isHalfTy())
+ if (T0->is16bitFPTy() || T1->is16bitFPTy())
return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
return llvm::Type::getDoubleTy(getVMContext());
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -156,6 +156,8 @@
public:
X86TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
: TargetInfo(Triple) {
+ BFloat16Width = BFloat16Align = 16;
+ BFloat16Format = &llvm::APFloat::BFloat();
LongDoubleFormat = &llvm::APFloat::x87DoubleExtended();
AddrSpaceMap = &X86AddrSpaceMap;
HasStrictFP = true;
@@ -396,6 +398,8 @@
uint64_t getPointerAlignV(unsigned AddrSpace) const override {
return getPointerWidthV(AddrSpace);
}
+
+ const char *getBFloat16Mangling() const override { return "u6__bf16"; };
};
// X86-32 generic target
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -358,6 +358,8 @@
HasFloat16 = SSELevel >= SSE2;
+ HasBFloat16 = SSELevel >= SSE2;
+
MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch<MMX3DNowEnum>(Feature)
.Case("+3dnowa", AMD3DNowAthlon)
.Case("+3dnow", AMD3DNow)
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -756,6 +756,10 @@
``__bf16`` is purely a storage format; it is currently only supported on the following targets:
* 32-bit ARM
* 64-bit ARM (AArch64)
+* X86 (see below)
+
+On X86 targets, ``__bf16`` is supported as long as SSE2 is available, which
+includes all 64-bit and all recent 32-bit processors.
``__fp16`` is a storage and interchange format only. This means that values of
``__fp16`` are immediately promoted to (at least) ``float`` when used in arithmetic
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits