llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (Ami-zhang) <details> <summary>Changes</summary> The LoongArch psABI recently added __bf16 type support. Now we can enable this new type in clang. Currently, bf16 operations are automatically supported by promoting to float. This patch adds bf16 support by ensuring that load extension / truncate store operations are properly expanded. And this commit implements support for bf16 truncate/extend on hard FP targets. The extend operation is implemented by a shift just as in the standard legalization. This requires custom lowering of the truncate libcall on hard float ABIs (the normal libcall code path is used on soft ABIs). --- Patch is 92.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142548.diff 8 Files Affected: - (modified) clang/docs/LanguageExtensions.rst (+1) - (modified) clang/lib/Basic/Targets/LoongArch.h (+5) - (added) clang/test/CodeGen/LoongArch/bfloat-abi.c (+611) - (added) clang/test/CodeGen/LoongArch/bfloat-mangle.cpp (+19) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+50-4) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2) - (added) llvm/test/CodeGen/LoongArch/bf16-promote.ll (+172) - (added) llvm/test/CodeGen/LoongArch/bf16.ll (+1048) ``````````diff diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 01b45cf685959..34cea9cc0cf66 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1009,6 +1009,7 @@ to ``float``; see below for more information on this emulation. * 64-bit ARM (AArch64) * RISC-V * X86 (when SSE2 is available) + * LoongArch (For X86, SSE2 is available on 64-bit and all recent 32-bit processors.) diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 8a8c978ab89db..7e9affc98ac0f 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -49,6 +49,9 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureLD_SEQ_SA = false; HasFeatureDiv32 = false; HasFeatureSCQ = false; + BFloat16Width = 16; + BFloat16Align = 16; + BFloat16Format = &llvm::APFloat::BFloat(); LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); @@ -99,6 +102,8 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } + bool hasBFloat16Type() const override { return true; } + bool useFP16ConversionIntrinsics() const override { return false; } bool handleTargetFeatures(std::vector<std::string> &Features, diff --git a/clang/test/CodeGen/LoongArch/bfloat-abi.c b/clang/test/CodeGen/LoongArch/bfloat-abi.c new file mode 100644 index 0000000000000..9f0e25c17cc74 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/bfloat-abi.c @@ -0,0 +1,611 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA64 +// RUN: %clang_cc1 -triple loongarch32 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK-LA32 + +struct bfloat1 { + __bf16 a; +}; + +// CHECK-LA64-LABEL: define dso_local bfloat @h1 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2 +// CHECK-LA64-NEXT: ret bfloat [[TMP2]] +// +// CHECK-LA32-LABEL: define dso_local bfloat @h1 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT1:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT1]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[TMP1]], align 2 +// CHECK-LA32-NEXT: ret bfloat [[TMP2]] +// +struct bfloat1 h1(__bf16 a) { + struct bfloat1 x; + x.a = a; + return x; +} + +struct bfloat2 { + __bf16 a; + __bf16 b; +}; + +// CHECK-LA64-LABEL: define dso_local { bfloat, bfloat } @h2 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2 +// CHECK-LA64-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2 +// CHECK-LA64-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0 +// CHECK-LA64-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA64-NEXT: ret { bfloat, bfloat } [[TMP7]] +// +// CHECK-LA32-LABEL: define dso_local { bfloat, bfloat } @h2 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT2:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[TMP2]], align 2 +// CHECK-LA32-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { bfloat, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 2 +// CHECK-LA32-NEXT: [[TMP6:%.*]] = insertvalue { bfloat, bfloat } poison, bfloat [[TMP3]], 0 +// CHECK-LA32-NEXT: [[TMP7:%.*]] = insertvalue { bfloat, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA32-NEXT: ret { bfloat, bfloat } [[TMP7]] +// +struct bfloat2 h2(__bf16 a, __bf16 b) { + struct bfloat2 x; + x.a = a; + x.b = b; + return x; +} + +struct bfloat3 { + __bf16 a; + __bf16 b; + __bf16 c; +}; + +// CHECK-LA64-LABEL: define dso_local i64 @h3 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[RETVAL_COERCE:%.*]] = alloca i64, align 8 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i64 6, i1 false) +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL_COERCE]], align 8 +// CHECK-LA64-NEXT: ret i64 [[TMP3]] +// +// CHECK-LA32-LABEL: define dso_local [2 x i32] @h3 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT3:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[RETVAL_COERCE:%.*]] = alloca [2 x i32], align 4 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT3]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL_COERCE]], ptr align 2 [[RETVAL]], i32 6, i1 false) +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load [2 x i32], ptr [[RETVAL_COERCE]], align 4 +// CHECK-LA32-NEXT: ret [2 x i32] [[TMP3]] +// +struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) { + struct bfloat3 x; + x.a = a; + x.b = b; + x.c = c; + return x; +} + +struct bfloat4 { + __bf16 a; + __bf16 b; + __bf16 c; + __bf16 d; +}; + +// CHECK-LA64-LABEL: define dso_local i64 @h4 +// CHECK-LA64-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-LA64-NEXT: [[D4:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-LA64-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-LA64-NEXT: [[TMP4:%.*]] = load i64, ptr [[RETVAL]], align 2 +// CHECK-LA64-NEXT: ret i64 [[TMP4]] +// +// CHECK-LA32-LABEL: define dso_local [2 x i32] @h4 +// CHECK-LA32-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_BFLOAT4:%.*]], align 2 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store bfloat [[TMP0]], ptr [[A1]], align 2 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 2 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA32-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA32-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-LA32-NEXT: [[D4:%.*]] = getelementptr inbounds nuw [[STRUCT_BFLOAT4]], ptr [[RETVAL]], i32 0, i32 3 +// CHECK-LA32-NEXT: store bfloat [[TMP3]], ptr [[D4]], align 2 +// CHECK-LA32-NEXT: [[TMP4:%.*]] = load [2 x i32], ptr [[RETVAL]], align 2 +// CHECK-LA32-NEXT: ret [2 x i32] [[TMP4]] +// +struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) { + struct bfloat4 x; + x.a = a; + x.b = b; + x.c = c; + x.d = d; + return x; +} + +struct floatbfloat { + float a; + __bf16 b; +}; + +// CHECK-LA64-LABEL: define dso_local { float, bfloat } @fh +// CHECK-LA64-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { float, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK-LA64-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { float, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 4 +// CHECK-LA64-NEXT: [[TMP6:%.*]] = insertvalue { float, bfloat } poison, float [[TMP3]], 0 +// CHECK-LA64-NEXT: [[TMP7:%.*]] = insertvalue { float, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA64-NEXT: ret { float, bfloat } [[TMP7]] +// +// CHECK-LA32-LABEL: define dso_local { float, bfloat } @fh +// CHECK-LA32-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA32-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT:%.*]], align 4 +// CHECK-LA32-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-LA32-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA32-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-LA32-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-LA32-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-LA32-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA32-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-LA32-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { float, bfloat }, ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA32-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK-LA32-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { float, bfloat }, ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA32-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[TMP4]], align 4 +// CHECK-LA32-NEXT: [[TMP6:%.*]] = insertvalue { float, bfloat } poison, float [[TMP3]], 0 +// CHECK-LA32-NEXT: [[TMP7:%.*]] = insertvalue { float, bfloat } [[TMP6]], bfloat [[TMP5]], 1 +// CHECK-LA32-NEXT: ret { float, bfloat } [[TMP7]] +// +struct floatbfloat fh(float a, __bf16 b) { + struct floatbfloat x; + x.a = a; + x.b = b; + return x; +} + +struct floatbfloat2 { + float a; + __bf16 b; + __bf16 c; +}; + +// CHECK-LA64-LABEL: define dso_local i64 @fh2 +// CHECK-LA64-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA64-NEXT: entry: +// CHECK-LA64-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOATBFLOAT2:%.*]], align 4 +// CHECK-LA64-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK-LA64-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-LA64-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK-LA64-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK-LA64-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-LA64-NEXT: store float [[TMP0]], ptr [[A1]], align 4 +// CHECK-LA64-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-LA64-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 1 +// CHECK-LA64-NEXT: store bfloat [[TMP1]], ptr [[B2]], align 4 +// CHECK-LA64-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-LA64-NEXT: [[C3:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOATBFLOAT2]], ptr [[RETVAL]], i32 0, i32 2 +// CHECK-LA64-NEXT: store bfloat [[TMP2]], ptr [[C3]], align 2 +// CHECK-LA64-NEXT: [[TMP3:%.*]] = load i64, ptr [[RETVAL]], align 4 +// CHECK-LA64-NEXT: ret i64 [[TMP3]] +// +// CHECK-LA32-LABEL: define dso_local [2 x i32] @fh2 +// CHECK-LA32-SAME: (float noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-LA32-NEXT: entry: +// CHECK-LA3... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/142548 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits