Author: Francis Visoiu Mistrih Date: 2023-08-31T17:13:19-07:00 New Revision: c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be
URL: https://github.com/llvm/llvm-project/commit/c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be DIFF: https://github.com/llvm/llvm-project/commit/c987f9d7fdc7b22c9bf68d7b3f0df10b68c679be.diff LOG: [Matrix] Try to emit fmuladd for both vector and matrix types For vector * scalar + vector, we emit `fmuladd` directly from clang. This enables it also for matrix * scalar + matrix. rdar://113967122 Differential Revision: https://reviews.llvm.org/D158883 Added: Modified: clang/lib/CodeGen/CGExprScalar.cpp clang/test/CodeGen/ffp-model.c Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 6d5a61b24133e2..a71b7057bb523a 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3874,6 +3874,14 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { } } + // For vector and matrix adds, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -3887,10 +3895,6 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) - return FMulAdd; - return Builder.CreateFAdd(op.LHS, op.RHS, "add"); } @@ -4024,6 +4028,14 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { } } + // For vector and matrix subs, try to fold into a fmuladd. + if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); + // Try to form an fmuladd. + if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) + return FMulAdd; + } + if (op.Ty->isConstantMatrixType()) { llvm::MatrixBuilder MB(Builder); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); @@ -4037,9 +4049,6 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { if (op.LHS->getType()->isFPOrFPVectorTy()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); - // Try to form an fmuladd. - if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) - return FMulAdd; return Builder.CreateFSub(op.LHS, op.RHS, "sub"); } diff --git a/clang/test/CodeGen/ffp-model.c b/clang/test/CodeGen/ffp-model.c index 57fa0ef2782051..b3d297a2f85f46 100644 --- a/clang/test/CodeGen/ffp-model.c +++ b/clang/test/CodeGen/ffp-model.c @@ -1,18 +1,18 @@ // REQUIRES: x86-registered-target -// RUN: %clang -S -emit-llvm -ffp-model=fast -emit-llvm %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=fast %s -o - \ // RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-FAST -// RUN: %clang -S -emit-llvm -ffp-model=precise %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise %s -o - \ // RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE -// RUN: %clang -S -emit-llvm -ffp-model=strict %s -o - \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict %s -o - \ // RUN: -target x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT -// RUN: %clang -S -emit-llvm -ffp-model=strict -ffast-math \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=strict -ffast-math \ // RUN: -target x86_64 %s -o - | FileCheck %s \ // RUN: --check-prefixes CHECK,CHECK-STRICT-FAST -// RUN: %clang -S -emit-llvm -ffp-model=precise -ffast-math \ +// RUN: %clang -S -emit-llvm -fenable-matrix -ffp-model=precise -ffast-math \ // RUN: %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-FAST1 float mymuladd(float x, float y, float z) { @@ -46,3 +46,105 @@ float mymuladd(float x, float y, float z) { // CHECK-FAST1: load float, ptr {{.*}} // CHECK-FAST1: fadd fast float {{.*}}, {{.*}} } + +typedef float __attribute__((ext_vector_type(2))) v2f; + +v2f my_vec_muladd(v2f x, float y, v2f z) { + // CHECK: define{{.*}} @my_vec_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <2 x float> + // CHECK-FAST: load <2 x float>, ptr + // CHECK-FAST: fadd fast <2 x float> + + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}}) + + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <2 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <2 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}} +} + +typedef float __attribute__((matrix_type(2, 1))) m21f; + +m21f my_m21_muladd(m21f x, float y, m21f z) { + // CHECK: define{{.*}} <2 x float> @my_m21_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <2 x float> + // CHECK-FAST: load <2 x float>, ptr + // CHECK-FAST: fadd fast <2 x float> + + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <2 x float>, ptr + // CHECK-PRECISE: call <2 x float> @llvm.fmuladd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, <2 x float> {{.*}}) + + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <2 x float>, ptr + // CHECK-STRICT: call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <2 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <2 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <2 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <2 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <2 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <2 x float> {{.*}}, {{.*}} +} + +typedef float __attribute__((matrix_type(2, 2))) m22f; + +m22f my_m22_muladd(m22f x, float y, m22f z) { + // CHECK: define{{.*}} <4 x float> @my_m22_muladd + return x * y + z; + + // CHECK-FAST: fmul fast <4 x float> + // CHECK-FAST: load <4 x float>, ptr + // CHECK-FAST: fadd fast <4 x float> + + // CHECK-PRECISE: load <4 x float>, ptr + // CHECK-PRECISE: load float, ptr + // CHECK-PRECISE: load <4 x float>, ptr + // CHECK-PRECISE: call <4 x float> @llvm.fmuladd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}) + + // CHECK-STRICT: load <4 x float>, ptr + // CHECK-STRICT: load float, ptr + // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}}) + // CHECK-STRICT: load <4 x float>, ptr + // CHECK-STRICT: call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}}, {{.*}}) + + // CHECK-STRICT-FAST: load <4 x float>, ptr + // CHECK-STRICT-FAST: load float, ptr + // CHECK-STRICT-FAST: fmul fast <4 x float> {{.*}}, {{.*}} + // CHECK-STRICT-FAST: load <4 x float>, ptr + // CHECK-STRICT-FAST: fadd fast <4 x float> {{.*}}, {{.*}} + + // CHECK-FAST1: load <4 x float>, ptr + // CHECK-FAST1: load float, ptr + // CHECK-FAST1: fmul fast <4 x float> {{.*}}, {{.*}} + // CHECK-FAST1: load <4 x float>, ptr {{.*}} + // CHECK-FAST1: fadd fast <4 x float> {{.*}}, {{.*}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits