Author: Jasmine Tang Date: 2025-12-12T10:50:30Z New Revision: 80ec43d455a5e47ba005112cd2b2c447bb40c42c
URL: https://github.com/llvm/llvm-project/commit/80ec43d455a5e47ba005112cd2b2c447bb40c42c DIFF: https://github.com/llvm/llvm-project/commit/80ec43d455a5e47ba005112cd2b2c447bb40c42c.diff LOG: [CIR] Implement builtin reduce fadd/fmul/fmax/fmin (#171633) New files are created to match the structure over at OGs Added: clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index f9e1f75a51143..72e6bea244802 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1552,26 +1552,52 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_vpshrdw128: case X86::BI__builtin_ia32_vpshrdw256: case X86::BI__builtin_ia32_vpshrdw512: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: case X86::BI__builtin_ia32_reduce_fadd_ph512: case X86::BI__builtin_ia32_reduce_fadd_ph256: - case X86::BI__builtin_ia32_reduce_fadd_ph128: + case X86::BI__builtin_ia32_reduce_fadd_ph128: { + assert(!cir::MissingFeatures::fastMathFlags()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "vector.reduce.fadd", ops[0].getType(), + mlir::ValueRange{ops[0], ops[1]}); + } case X86::BI__builtin_ia32_reduce_fmul_pd512: case X86::BI__builtin_ia32_reduce_fmul_ps512: case X86::BI__builtin_ia32_reduce_fmul_ph512: case X86::BI__builtin_ia32_reduce_fmul_ph256: - case X86::BI__builtin_ia32_reduce_fmul_ph128: + case X86::BI__builtin_ia32_reduce_fmul_ph128: { + assert(!cir::MissingFeatures::fastMathFlags()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "vector.reduce.fmul", ops[0].getType(), + mlir::ValueRange{ops[0], ops[1]}); + } case X86::BI__builtin_ia32_reduce_fmax_pd512: case X86::BI__builtin_ia32_reduce_fmax_ps512: case X86::BI__builtin_ia32_reduce_fmax_ph512: case X86::BI__builtin_ia32_reduce_fmax_ph256: - case X86::BI__builtin_ia32_reduce_fmax_ph128: + case X86::BI__builtin_ia32_reduce_fmax_ph128: { + assert(!cir::MissingFeatures::fastMathFlags()); + cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "vector.reduce.fmax", vecTy.getElementType(), + mlir::ValueRange{ops[0]}); + } case X86::BI__builtin_ia32_reduce_fmin_pd512: case X86::BI__builtin_ia32_reduce_fmin_ps512: case X86::BI__builtin_ia32_reduce_fmin_ph512: case X86::BI__builtin_ia32_reduce_fmin_ph256: - case X86::BI__builtin_ia32_reduce_fmin_ph128: + case X86::BI__builtin_ia32_reduce_fmin_ph128: { + assert(!cir::MissingFeatures::fastMathFlags()); + cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + "vector.reduce.fmin", vecTy.getElementType(), + mlir::ValueRange{ops[0]}); + } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: case X86::BI__builtin_ia32_rdrand64_step: diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c new file mode 100644 index 0000000000000..bc4249ffd25fc --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +#include <immintrin.h> + +double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){ + + // CIR-LABEL: _mm512_reduce_add_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_add_pd + // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_add_pd + // LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_pd + // OGCG-NOT: reassoc + // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}}) + // OGCG-NOT: reassoc + return _mm512_reduce_add_pd(__W) + ExtraAddOp; +} + +double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){ + // CIR-LABEL: _mm512_reduce_mul_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_mul_pd + // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_mul_pd + // LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_pd + // OGCG-NOT: reassoc + // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}}) + // OGCG-NOT: reassoc + return _mm512_reduce_mul_pd(__W) * ExtraMulOp; +} + + +float test_mm512_reduce_add_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_add_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_add_ps + // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_add_ps + // LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_ps + // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}}) + return _mm512_reduce_add_ps(__W); +} + +float test_mm512_reduce_mul_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_mul_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_mul_ps + // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_mul_ps + // LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_ps + // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}}) + return _mm512_reduce_mul_ps(__W); +} diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c new file mode 100644 index 0000000000000..104e76fa6ad03 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +#include <immintrin.h> + +double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){ + // CIR-LABEL: _mm512_reduce_max_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_max_pd + // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_max_pd + // LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_pd + // OGCG-NOT: nnan + // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}}) + // OGCG-NOT: nnan + return _mm512_reduce_max_pd(__W) + ExtraAddOp; +} + +double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){ + // CIR-LABEL: _mm512_reduce_min_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_min_pd + // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_min_pd + // LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_pd + // OGCG-NOT: nnan + // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}}) + // OGCG-NOT: nnan + return _mm512_reduce_min_pd(__W) * ExtraMulOp; +} + +float test_mm512_reduce_max_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_max_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_max_ps + // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_max_ps + // LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_ps + // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}}) + return _mm512_reduce_max_ps(__W); +} + +float test_mm512_reduce_min_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_min_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_min_ps + // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_min_ps + // LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_ps + // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}}) + return _mm512_reduce_min_ps(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index 161fc45b2a32d..464fa29fffc20 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -63,4 +63,64 @@ __m512h test_mm512_undefined_ph(void) { // OGCG-LABEL: test_mm512_undefined_ph // OGCG: ret <32 x half> zeroinitializer return _mm512_undefined_ph(); -} \ No newline at end of file +} + +_Float16 test_mm512_reduce_add_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_add_ph + // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_ph + // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}}) + return _mm512_reduce_add_ph(__W); +} + +_Float16 test_mm512_reduce_mul_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_mul_ph + // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_ph + // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}}) + return _mm512_reduce_mul_ph(__W); +} + +_Float16 test_mm512_reduce_max_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_max_ph + // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_ph + // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}}) + return _mm512_reduce_max_ph(__W); +} + +_Float16 test_mm512_reduce_min_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_min_ph + // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_ph + // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}}) + return _mm512_reduce_min_ph(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c new file mode 100644 index 0000000000000..994fdfec23c2c --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +#include <immintrin.h> + +_Float16 test_mm256_reduce_add_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_add_ph + // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_add_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}}) + return _mm256_reduce_add_ph(__W); +} + +_Float16 test_mm256_reduce_mul_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_mul_ph + // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_mul_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}}) + return _mm256_reduce_mul_ph(__W); +} + +_Float16 test_mm256_reduce_max_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_max_ph + // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_max_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}}) + return _mm256_reduce_max_ph(__W); +} + +_Float16 test_mm256_reduce_min_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_min_ph + // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_min_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}}) + return _mm256_reduce_min_ph(__W); +} + +_Float16 test_mm_reduce_add_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_add_ph + // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_add_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}}) + return _mm_reduce_add_ph(__W); +} + +_Float16 test_mm_reduce_mul_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_mul_ph + // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_mul_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}}) + return _mm_reduce_mul_ph(__W); +} + +_Float16 test_mm_reduce_max_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_max_ph + // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_max_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}}) + return _mm_reduce_max_ph(__W); +} + +_Float16 test_mm_reduce_min_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_min_ph + // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_min_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}}) + return _mm_reduce_min_ph(__W); +} + _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
