fmin (#171633)

via cfe-commits Fri, 12 Dec 2025 02:50:44 -0800

Author: Jasmine Tang
Date: 2025-12-12T10:50:30Z
New Revision: 80ec43d455a5e47ba005112cd2b2c447bb40c42c


URL: 
https://github.com/llvm/llvm-project/commit/80ec43d455a5e47ba005112cd2b2c447bb40c42c
DIFF: 
https://github.com/llvm/llvm-project/commit/80ec43d455a5e47ba005112cd2b2c447bb40c42c.diff

LOG: [CIR] Implement builtin reduce fadd/fmul/fmax/fmin (#171633)

New files are created to match the structure over at OGs

Added: 
    clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
    clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
    clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f9e1f75a51143..72e6bea244802 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1552,26 +1552,52 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_vpshrdw128:
   case X86::BI__builtin_ia32_vpshrdw256:
   case X86::BI__builtin_ia32_vpshrdw512:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
   case X86::BI__builtin_ia32_reduce_fadd_ph256:
-  case X86::BI__builtin_ia32_reduce_fadd_ph128:
+  case X86::BI__builtin_ia32_reduce_fadd_ph128: {
+    assert(!cir::MissingFeatures::fastMathFlags());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               "vector.reduce.fadd", ops[0].getType(),
+                               mlir::ValueRange{ops[0], ops[1]});
+  }
   case X86::BI__builtin_ia32_reduce_fmul_pd512:
   case X86::BI__builtin_ia32_reduce_fmul_ps512:
   case X86::BI__builtin_ia32_reduce_fmul_ph512:
   case X86::BI__builtin_ia32_reduce_fmul_ph256:
-  case X86::BI__builtin_ia32_reduce_fmul_ph128:
+  case X86::BI__builtin_ia32_reduce_fmul_ph128: {
+    assert(!cir::MissingFeatures::fastMathFlags());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               "vector.reduce.fmul", ops[0].getType(),
+                               mlir::ValueRange{ops[0], ops[1]});
+  }
   case X86::BI__builtin_ia32_reduce_fmax_pd512:
   case X86::BI__builtin_ia32_reduce_fmax_ps512:
   case X86::BI__builtin_ia32_reduce_fmax_ph512:
   case X86::BI__builtin_ia32_reduce_fmax_ph256:
-  case X86::BI__builtin_ia32_reduce_fmax_ph128:
+  case X86::BI__builtin_ia32_reduce_fmax_ph128: {
+    assert(!cir::MissingFeatures::fastMathFlags());
+    cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               "vector.reduce.fmax", vecTy.getElementType(),
+                               mlir::ValueRange{ops[0]});
+  }
   case X86::BI__builtin_ia32_reduce_fmin_pd512:
   case X86::BI__builtin_ia32_reduce_fmin_ps512:
   case X86::BI__builtin_ia32_reduce_fmin_ph512:
   case X86::BI__builtin_ia32_reduce_fmin_ph256:
-  case X86::BI__builtin_ia32_reduce_fmin_ph128:
+  case X86::BI__builtin_ia32_reduce_fmin_ph128: {
+    assert(!cir::MissingFeatures::fastMathFlags());
+    cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+    return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+                               "vector.reduce.fmin", vecTy.getElementType(),
+                               mlir::ValueRange{ops[0]});
+  }
   case X86::BI__builtin_ia32_rdrand16_step:
   case X86::BI__builtin_ia32_rdrand32_step:
   case X86::BI__builtin_ia32_rdrand64_step:

diff  --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
new file mode 100644
index 0000000000000..bc4249ffd25fc
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck 
%s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck 
%s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s 
--check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
+
+  // CIR-LABEL: _mm512_reduce_add_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_add_pd
+  // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_add_pd
+  // LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 
x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_pd
+  // OGCG-NOT: reassoc
+  // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double 
-0.000000e+00, <8 x double> %{{.*}})
+  // OGCG-NOT: reassoc
+  return _mm512_reduce_add_pd(__W) + ExtraAddOp;
+}
+
+double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
+  // CIR-LABEL: _mm512_reduce_mul_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_mul_pd
+  // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_mul_pd
+  // LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 
x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_pd
+  // OGCG-NOT: reassoc
+  // OGCG:    call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 
1.000000e+00, <8 x double> %{{.*}})
+  // OGCG-NOT: reassoc
+  return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
+}
+
+
+float test_mm512_reduce_add_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_add_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_add_ps
+  // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_add_ps
+  // LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 
x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_ps
+  // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float 
-0.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_add_ps(__W);
+}
+
+float test_mm512_reduce_mul_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_mul_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_mul_ps
+  // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_mul_ps
+  // LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 
x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_ps
+  // OGCG:    call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 
1.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_mul_ps(__W);
+}

diff  --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c 
b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
new file mode 100644
index 0000000000000..104e76fa6ad03
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck 
%s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck 
%s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s 
--check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
+  // CIR-LABEL: _mm512_reduce_max_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_max_pd
+  // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_max_pd
+  // LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_pd
+  // OGCG-NOT: nnan
+  // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> 
%{{.*}})
+  // OGCG-NOT: nnan
+  return _mm512_reduce_max_pd(__W) + ExtraAddOp;
+}
+
+double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
+  // CIR-LABEL: _mm512_reduce_min_pd
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<8 x !cir.double>) -> !cir.double
+
+  // CIR-LABEL: test_mm512_reduce_min_pd
+  // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.double>) -> !cir.double
+
+  // LLVM-LABEL: test_mm512_reduce_min_pd
+  // LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_pd
+  // OGCG-NOT: nnan
+  // OGCG:    call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x 
double> %{{.*}})
+  // OGCG-NOT: nnan
+  return _mm512_reduce_min_pd(__W) * ExtraMulOp;
+}
+
+float test_mm512_reduce_max_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_max_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_max_ps
+  // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_max_ps
+  // LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_ps
+  // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> 
%{{.*}})
+  return _mm512_reduce_max_ps(__W);
+}
+
+float test_mm512_reduce_min_ps(__m512 __W){
+  // CIR-LABEL: _mm512_reduce_min_ps
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<16 x !cir.float>) -> !cir.float
+
+  // CIR-LABEL: test_mm512_reduce_min_ps
+  // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.float>) -> !cir.float
+
+  // LLVM-LABEL: test_mm512_reduce_min_ps
+  // LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_ps
+  // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> 
%{{.*}})
+  return _mm512_reduce_min_ps(__W);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index 161fc45b2a32d..464fa29fffc20 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -63,4 +63,64 @@ __m512h test_mm512_undefined_ph(void) {
   // OGCG-LABEL: test_mm512_undefined_ph
   // OGCG: ret <32 x half> zeroinitializer
   return _mm512_undefined_ph();
-}
\ No newline at end of file
+}
+
+_Float16 test_mm512_reduce_add_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm512_reduce_add_ph
+  // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_add_ph
+  // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 
0xH8000, <32 x half> %{{.*}})
+  return _mm512_reduce_add_ph(__W);
+}
+
+_Float16 test_mm512_reduce_mul_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm512_reduce_mul_ph
+  // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 
0xH3C00, <32 x half> %{{.*}})
+  return _mm512_reduce_mul_ph(__W);
+}
+
+_Float16 test_mm512_reduce_max_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm512_reduce_max_ph
+  // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_max_ph
+  // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> 
%{{.*}})
+  return _mm512_reduce_max_ph(__W);
+}
+
+_Float16 test_mm512_reduce_min_ph(__m512h __W) {
+  // CIR-LABEL: _mm512_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] 
(!cir.vector<32 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm512_reduce_min_ph
+  // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm512_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm512_reduce_min_ph
+  // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> 
%{{.*}})
+  return _mm512_reduce_min_ph(__W);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
new file mode 100644
index 0000000000000..994fdfec23c2c
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -fclangir -emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature 
+avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+#include <immintrin.h>
+
+_Float16 test_mm256_reduce_add_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_add_ph
+  // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_add_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, 
<16 x half> %{{.*}})
+  return _mm256_reduce_add_ph(__W);
+}
+
+_Float16 test_mm256_reduce_mul_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_mul_ph
+  // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, 
<16 x half> %{{.*}})
+  return _mm256_reduce_mul_ph(__W);
+}
+
+_Float16 test_mm256_reduce_max_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<16 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm256_reduce_max_ph
+  // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_max_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+  return _mm256_reduce_max_ph(__W);
+}
+
+_Float16 test_mm256_reduce_min_ph(__m256h __W) {
+  // CIR-LABEL: _mm256_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm256_reduce_min_ph
+  // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm256_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm256_reduce_min_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+  return _mm256_reduce_min_ph(__W);
+}
+
+_Float16 test_mm_reduce_add_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_add_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_add_ph
+  // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_add_ph
+  // LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_add_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 
x half> %{{.*}})
+  return _mm_reduce_add_ph(__W);
+}
+
+_Float16 test_mm_reduce_mul_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_mul_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : 
(!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_mul_ph
+  // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_mul_ph
+  // LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> 
%{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_mul_ph
+  // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 
x half> %{{.*}})
+  return _mm_reduce_mul_ph(__W);
+}
+
+_Float16 test_mm_reduce_max_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_max_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] 
(!cir.vector<8 x !cir.f16>) -> !cir.f16 
+
+  // CIR-LABEL: test_mm_reduce_max_ph
+  // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_max_ph
+  // LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_max_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
+  return _mm_reduce_max_ph(__W);
+}
+
+_Float16 test_mm_reduce_min_ph(__m128h __W) {
+  // CIR-LABEL: _mm_reduce_min_ph
+  // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : 
(!cir.vector<8 x !cir.f16>) -> !cir.f16
+
+  // CIR-LABEL: test_mm_reduce_min_ph
+  // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x 
!cir.f16>) -> !cir.f16
+
+  // LLVM-LABEL: test_mm_reduce_min_ph
+  // LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
+
+  // OGCG-LABEL: test_mm_reduce_min_ph
+  // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
+  return _mm_reduce_min_ph(__W);
+}
+


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 80ec43d - [CIR] Implement builtin reduce fadd/fmul/fmax/fmin (#171633)

Reply via email to