stuij updated this revision to Diff 265524.
stuij added a comment.
addressed review comments, most of all changed license header on the generated
bfloat file
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D79708/new/
https://reviews.llvm.org/D79708
Files:
clang/include/clang/Basic/arm_bf16.td
clang/include/clang/Basic/arm_neon_incl.td
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/ARM.cpp
clang/lib/Headers/CMakeLists.txt
clang/test/Preprocessor/aarch64-target-features.c
clang/test/Preprocessor/arm-target-features.c
clang/utils/TableGen/NeonEmitter.cpp
clang/utils/TableGen/TableGen.cpp
clang/utils/TableGen/TableGenBackends.h
Index: clang/utils/TableGen/TableGenBackends.h
===================================================================
--- clang/utils/TableGen/TableGenBackends.h
+++ clang/utils/TableGen/TableGenBackends.h
@@ -85,6 +85,7 @@
void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitBF16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitNeon2(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
Index: clang/utils/TableGen/TableGen.cpp
===================================================================
--- clang/utils/TableGen/TableGen.cpp
+++ clang/utils/TableGen/TableGen.cpp
@@ -63,6 +63,7 @@
GenClangOpenCLBuiltins,
GenArmNeon,
GenArmFP16,
+ GenArmBF16,
GenArmNeonSema,
GenArmNeonTest,
GenArmMveHeader,
@@ -186,6 +187,7 @@
"Generate OpenCL builtin declaration handlers"),
clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
+ clEnumValN(GenArmBF16, "gen-arm-bf16", "Generate arm_bf16.h for clang"),
clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
"Generate ARM NEON sema support for clang"),
clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
@@ -360,6 +362,9 @@
case GenArmFP16:
EmitFP16(Records, OS);
break;
+ case GenArmBF16:
+ EmitBF16(Records, OS);
+ break;
case GenArmNeonSema:
EmitNeonSema(Records, OS);
break;
Index: clang/utils/TableGen/NeonEmitter.cpp
===================================================================
--- clang/utils/TableGen/NeonEmitter.cpp
+++ clang/utils/TableGen/NeonEmitter.cpp
@@ -99,7 +99,8 @@
Poly128,
Float16,
Float32,
- Float64
+ Float64,
+ BFloat16
};
} // end namespace NeonTypeFlags
@@ -147,6 +148,7 @@
SInt,
UInt,
Poly,
+ BFloat16,
};
TypeKind Kind;
bool Immediate, Constant, Pointer;
@@ -199,6 +201,7 @@
bool isInt() const { return isInteger() && ElementBitwidth == 32; }
bool isLong() const { return isInteger() && ElementBitwidth == 64; }
bool isVoid() const { return Kind == Void; }
+ bool isBFloat16() const { return Kind == BFloat16; }
unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
unsigned getSizeInBits() const { return Bitwidth; }
unsigned getElementSizeInBits() const { return ElementBitwidth; }
@@ -585,8 +588,11 @@
// runFP16 - Emit arm_fp16.h.inc
void runFP16(raw_ostream &o);
- // runHeader - Emit all the __builtin prototypes used in arm_neon.h
- // and arm_fp16.h
+ // runBF16 - Emit arm_bf16.h.inc
+ void runBF16(raw_ostream &o);
+
+ // runHeader - Emit all the __builtin prototypes used in arm_neon.h,
+ // arm_fp16.h and arm_bf16.h
void runHeader(raw_ostream &o);
// runTests - Emit tests for all the Neon intrinsics.
@@ -611,6 +617,8 @@
S += "poly";
else if (isFloating())
S += "float";
+ else if (isBFloat16())
+ S += "bfloat";
else
S += "int";
@@ -650,7 +658,10 @@
case 128: S += "LLLi"; break;
default: llvm_unreachable("Unhandled case!");
}
- else
+ else if (isBFloat16()) {
+ assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
+ S += "y";
+ } else
switch (ElementBitwidth) {
case 16: S += "h"; break;
case 32: S += "f"; break;
@@ -704,6 +715,11 @@
Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
}
+ if (isBFloat16()) {
+ assert(Addend == 1 && "BFloat16 is only 16 bit");
+ Base = (unsigned)NeonTypeFlags::BFloat16;
+ }
+
if (Bitwidth == 128)
Base |= (unsigned)NeonTypeFlags::QuadFlag;
if (isInteger() && !isSigned())
@@ -727,6 +743,9 @@
} else if (Name.startswith("poly")) {
T.Kind = Poly;
Name = Name.drop_front(4);
+ } else if (Name.startswith("bfloat")) {
+ T.Kind = BFloat16;
+ Name = Name.drop_front(6);
} else {
assert(Name.startswith("int"));
Name = Name.drop_front(3);
@@ -825,6 +844,10 @@
if (isPoly())
NumVectors = 0;
break;
+ case 'b':
+ Kind = BFloat16;
+ ElementBitwidth = 16;
+ break;
default:
llvm_unreachable("Unhandled type code!");
}
@@ -851,6 +874,10 @@
case 'U':
Kind = UInt;
break;
+ case 'B':
+ Kind = BFloat16;
+ ElementBitwidth = 16;
+ break;
case 'F':
Kind = Float;
break;
@@ -932,6 +959,9 @@
if (CK == ClassB)
return "";
+ if (T.isBFloat16())
+ return "bf16";
+
if (T.isPoly())
typeCode = 'p';
else if (T.isInteger())
@@ -969,7 +999,7 @@
Type RetT = getReturnType();
if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
- !RetT.isFloating())
+ !RetT.isFloating() && !RetT.isBFloat16())
RetT.makeInteger(RetT.getElementSizeInBits(), false);
// Since the return value must be one type, return a vector type of the
@@ -2164,6 +2194,74 @@
genIntrinsicRangeCheckCode(OS, Defs);
}
+static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
+ std::string TypedefTypes(types);
+ std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+
+ // Emit vector typedefs.
+ bool InIfdef = false;
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, ".");
+ if (T.isDouble())
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
+ }
+
+ if (T.isPoly())
+ OS << "typedef __attribute__((neon_polyvector_type(";
+ else
+ OS << "typedef __attribute__((neon_vector_type(";
+
+ Type T2 = T;
+ T2.makeScalar();
+ OS << T.getNumElements() << "))) ";
+ OS << T2.str();
+ OS << " " << T.str() << ";\n";
+ }
+ if (InIfdef)
+ OS << "#endif\n";
+ OS << "\n";
+
+ // Emit struct typedefs.
+ InIfdef = false;
+ for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
+ for (auto &TS : TDTypeVec) {
+ bool IsA64 = false;
+ Type T(TS, ".");
+ if (T.isDouble())
+ IsA64 = true;
+
+ if (InIfdef && !IsA64) {
+ OS << "#endif\n";
+ InIfdef = false;
+ }
+ if (!InIfdef && IsA64) {
+ OS << "#ifdef __aarch64__\n";
+ InIfdef = true;
+ }
+
+ const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
+ Type VT(TS, Mods);
+ OS << "typedef struct " << VT.str() << " {\n";
+ OS << " " << T.str() << " val";
+ OS << "[" << NumMembers << "]";
+ OS << ";\n} ";
+ OS << VT.str() << ";\n";
+ OS << "\n";
+ }
+ }
+ if (InIfdef)
+ OS << "#endif\n";
+}
+
/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
/// is comprised of type definitions and function declarations.
void NeonEmitter::run(raw_ostream &OS) {
@@ -2218,6 +2316,11 @@
OS << "#include <stdint.h>\n\n";
+ OS << "#ifdef __ARM_FEATURE_BF16\n";
+ OS << "#include <arm_bf16.h>\n";
+ OS << "typedef __bf16 bfloat16_t;\n";
+ OS << "#endif\n\n";
+
// Emit NEON-specific scalar typedefs.
OS << "typedef float float32_t;\n";
OS << "typedef __fp16 float16_t;\n";
@@ -2238,74 +2341,11 @@
OS << "typedef int64_t poly64_t;\n";
OS << "#endif\n";
- // Emit Neon vector typedefs.
- std::string TypedefTypes(
- "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
- std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+ emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
- // Emit vector typedefs.
- bool InIfdef = false;
- for (auto &TS : TDTypeVec) {
- bool IsA64 = false;
- Type T(TS, ".");
- if (T.isDouble())
- IsA64 = true;
-
- if (InIfdef && !IsA64) {
- OS << "#endif\n";
- InIfdef = false;
- }
- if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
- InIfdef = true;
- }
-
- if (T.isPoly())
- OS << "typedef __attribute__((neon_polyvector_type(";
- else
- OS << "typedef __attribute__((neon_vector_type(";
-
- Type T2 = T;
- T2.makeScalar();
- OS << T.getNumElements() << "))) ";
- OS << T2.str();
- OS << " " << T.str() << ";\n";
- }
- if (InIfdef)
- OS << "#endif\n";
- OS << "\n";
-
- // Emit struct typedefs.
- InIfdef = false;
- for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
- for (auto &TS : TDTypeVec) {
- bool IsA64 = false;
- Type T(TS, ".");
- if (T.isDouble())
- IsA64 = true;
-
- if (InIfdef && !IsA64) {
- OS << "#endif\n";
- InIfdef = false;
- }
- if (!InIfdef && IsA64) {
- OS << "#ifdef __aarch64__\n";
- InIfdef = true;
- }
-
- const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
- Type VT(TS, Mods);
- OS << "typedef struct " << VT.str() << " {\n";
- OS << " " << T.str() << " val";
- OS << "[" << NumMembers << "]";
- OS << ";\n} ";
- OS << VT.str() << ";\n";
- OS << "\n";
- }
- }
- if (InIfdef)
- OS << "#endif\n";
- OS << "\n";
+ OS << "#ifdef __ARM_FEATURE_BF16\n";
+ emitNeonTypeDefs("bQb", OS);
+ OS << "#endif\n\n";
OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
"__nodebug__))\n\n";
@@ -2472,6 +2512,84 @@
OS << "#endif /* __ARM_FP16_H */\n";
}
+void NeonEmitter::runBF16(raw_ostream &OS) {
+ OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
+ "-----------------------------------===\n"
+ " *\n"
+ " *\n"
+ " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+ "Exceptions.\n"
+ " * See https://llvm.org/LICENSE.txt for license information.\n"
+ " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+ " *\n"
+ " *===-----------------------------------------------------------------"
+ "------===\n"
+ " */\n\n";
+
+ OS << "#ifndef __ARM_BF16_H\n";
+ OS << "#define __ARM_BF16_H\n\n";
+
+ OS << "typedef __bf16 bfloat16_t;\n";
+
+ OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
+ "__nodebug__))\n\n";
+
+ SmallVector<Intrinsic *, 128> Defs;
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ for (auto *I : Defs)
+ I->indexBody();
+
+ llvm::stable_sort(Defs, llvm::deref<std::less<>>());
+
+ // Only emit a def when its requirements have been met.
+ // FIXME: This loop could be made faster, but it's fast enough for now.
+ bool MadeProgress = true;
+ std::string InGuard;
+ while (!Defs.empty() && MadeProgress) {
+ MadeProgress = false;
+
+ for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
+ I != Defs.end(); /*No step*/) {
+ bool DependenciesSatisfied = true;
+ for (auto *II : (*I)->getDependencies()) {
+ if (llvm::is_contained(Defs, II))
+ DependenciesSatisfied = false;
+ }
+ if (!DependenciesSatisfied) {
+ // Try the next one.
+ ++I;
+ continue;
+ }
+
+ // Emit #endif/#if pair if needed.
+ if ((*I)->getGuard() != InGuard) {
+ if (!InGuard.empty())
+ OS << "#endif\n";
+ InGuard = (*I)->getGuard();
+ if (!InGuard.empty())
+ OS << "#if " << InGuard << "\n";
+ }
+
+ // Actually generate the intrinsic code.
+ OS << (*I)->generate();
+
+ MadeProgress = true;
+ I = Defs.erase(I);
+ }
+ }
+ assert(Defs.empty() && "Some requirements were not satisfied!");
+ if (!InGuard.empty())
+ OS << "#endif\n";
+
+ OS << "\n";
+ OS << "#undef __ai\n\n";
+
+ OS << "#endif\n";
+}
+
void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).run(OS);
}
@@ -2480,6 +2598,10 @@
NeonEmitter(Records).runFP16(OS);
}
+void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {
+ NeonEmitter(Records).runBF16(OS);
+}
+
void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
NeonEmitter(Records).runHeader(OS);
}
Index: clang/test/Preprocessor/arm-target-features.c
===================================================================
--- clang/test/Preprocessor/arm-target-features.c
+++ clang/test/Preprocessor/arm-target-features.c
@@ -7,6 +7,9 @@
// CHECK-V8A: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
// CHECK-V8A-NOT: #define __ARM_FP 0x
// CHECK-V8A-NOT: #define __ARM_FEATURE_DOTPROD
+// CHECK-V8A-NOT: #define __ARM_BF16_FORMAT_ALTERNATIVE
+// CHECK-V8A-NOT: #define __ARM_FEATURE_BF16
+// CHECK-V8A-NOT: #define __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
// RUN: %clang -target armv8a-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
// RUN: %clang -target armv8a-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
@@ -848,3 +851,9 @@
// RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s
// CHECK-SOFTVFP-NOT: #define __ARM_FP 0x
+
+// ================== Check BFloat Extensions.
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.6-a+bf16 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-BFLOAT %s
+// CHECK-BFLOAT: #define __ARM_BF16_FORMAT_ALTERNATIVE 1
+// CHECK-BFLOAT: #define __ARM_FEATURE_BF16 1
+// CHECK-BFLOAT: #define __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
Index: clang/test/Preprocessor/aarch64-target-features.c
===================================================================
--- clang/test/Preprocessor/aarch64-target-features.c
+++ clang/test/Preprocessor/aarch64-target-features.c
@@ -41,6 +41,12 @@
// CHECK-NOT: __ARM_FEATURE_DOTPROD
// CHECK-NOT: __ARM_FEATURE_PAC_DEFAULT
// CHECK-NOT: __ARM_FEATURE_BTI_DEFAULT
+// CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1
+// CHECK-NOT: __ARM_FEATURE_BF16 1
+// CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
+
+// RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
+// CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
@@ -368,3 +374,10 @@
// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a -mbranch-protection=pac-ret+bti -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-BTI %s
// CHECK-BTI-OFF-NOT: __ARM_FEATURE_BTI_DEFAULT
// CHECK-BTI: #define __ARM_FEATURE_BTI_DEFAULT 1
+
+// ================== Check BFloat Extensions.
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.6-a+bf16 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-BFLOAT %s
+// CHECK-BFLOAT: __ARM_BF16_FORMAT_ALTERNATIVE 1
+// CHECK-BFLOAT: __ARM_FEATURE_BF16 1
+// CHECK-BFLOAT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
+
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -189,6 +189,8 @@
clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
# Generate arm_sve.h
clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h)
+# Generate arm_bf16.h
+clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h)
# Generate arm_mve.h
clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
# Generate arm_cde.h
Index: clang/lib/Basic/Targets/ARM.cpp
===================================================================
--- clang/lib/Basic/Targets/ARM.cpp
+++ clang/lib/Basic/Targets/ARM.cpp
@@ -834,6 +834,12 @@
if (HasMatMul)
Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1");
+ if (HasBFloat16) {
+ Builder.defineMacro("__ARM_FEATURE_BF16", "1");
+ Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
+ Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1");
+ }
+
switch (ArchKind) {
default:
break;
Index: clang/lib/Basic/Targets/AArch64.cpp
===================================================================
--- clang/lib/Basic/Targets/AArch64.cpp
+++ clang/lib/Basic/Targets/AArch64.cpp
@@ -286,6 +286,12 @@
if (HasMatMul)
Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1");
+ if (HasBFloat16) {
+ Builder.defineMacro("__ARM_FEATURE_BF16", "1");
+ Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
+ Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1");
+ }
+
if ((FPU & NeonMode) && HasFP16FML)
Builder.defineMacro("__ARM_FEATURE_FP16FML", "1");
Index: clang/include/clang/Basic/arm_neon_incl.td
===================================================================
--- clang/include/clang/Basic/arm_neon_incl.td
+++ clang/include/clang/Basic/arm_neon_incl.td
@@ -215,6 +215,7 @@
// f: float
// h: half-float
// d: double
+// b: bfloat
//
// Typespec modifiers
// ------------------
@@ -236,6 +237,7 @@
// S: change to signed integer category.
// U: change to unsigned integer category.
// F: change to floating category.
+// B: change to BFloat
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
//
Index: clang/include/clang/Basic/arm_bf16.td
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/arm_bf16.td
@@ -0,0 +1,14 @@
+//===--- arm_fp16.td - ARM BF16 compiler interface ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TableGen definitions from which the ARM BF16 header
+// file will be generated.
+//
+//===----------------------------------------------------------------------===//
+
+include "arm_neon_incl.td"
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits