https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/171633
>From 83f30f03132f2279567b889f9cb4022e4b8b9856 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 10 Dec 2025 05:11:12 -0800 Subject: [PATCH 1/5] [CIR] Implement reduce fadd --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 29 +++++++++++++- .../CIR/CodeGen/X86/avx512-reduceIntrin.c | 40 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 17 +++++++- .../X86/avx512vlfp16-builtins.c | 38 ++++++++++++++++++ 4 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 62836ce0f7537..eaf55fdae6b47 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1492,11 +1492,38 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vpshrdw128: case X86::BI__builtin_ia32_vpshrdw256: case X86::BI__builtin_ia32_vpshrdw512: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: case X86::BI__builtin_ia32_reduce_fadd_ph512: case X86::BI__builtin_ia32_reduce_fadd_ph256: - case X86::BI__builtin_ia32_reduce_fadd_ph128: + case X86::BI__builtin_ia32_reduce_fadd_ph128: { + StringRef intrinsicName = ""; + switch (builtinID) { + case X86::BI__builtin_ia32_reduce_fadd_pd512: + intrinsicName = "vector.reduce.fadd.v8f64"; + break; + case X86::BI__builtin_ia32_reduce_fadd_ps512: + intrinsicName = "vector.reduce.fadd.v16f32"; + break; + case X86::BI__builtin_ia32_reduce_fadd_ph512: + intrinsicName = "vector.reduce.fadd.v32f16"; + break; + case X86::BI__builtin_ia32_reduce_fadd_ph256: + intrinsicName = "vector.reduce.fadd.v16f16"; + break; + case X86::BI__builtin_ia32_reduce_fadd_ph128: + intrinsicName = "vector.reduce.fadd.v8f16"; + break; + } + assert(!cir::MissingFeatures::fastMathFlags()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + intrinsicName, ops[0].getType(), + mlir::ValueRange{ops[0], ops[1]}); + } case X86::BI__builtin_ia32_reduce_fmul_pd512: case X86::BI__builtin_ia32_reduce_fmul_ps512: case X86::BI__builtin_ia32_reduce_fmul_ph512: diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c new file mode 100644 index 0000000000000..a9ba2ba688b22 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM +// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +#include <immintrin.h> + +double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){ + + // CIR-LABEL: _mm512_reduce_add_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_add_pd + // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_add_pd + // LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_pd + // OGCG-NOT: reassoc + // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}}) + // OGCG-NOT: reassoc + return _mm512_reduce_add_pd(__W) + ExtraAddOp; +} + + +float test_mm512_reduce_add_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_add_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_add_ps + // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_add_ps + // LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_ps + // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}}) + return _mm512_reduce_add_ps(__W); +} + diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index 161fc45b2a32d..b67d51dd3cb11 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -63,4 +63,19 @@ __m512h test_mm512_undefined_ph(void) { // OGCG-LABEL: test_mm512_undefined_ph // OGCG: ret <32 x half> zeroinitializer return _mm512_undefined_ph(); -} \ No newline at end of file +} + +_Float16 test_mm512_reduce_add_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_add_ph + // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_add_ph + // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}}) + return _mm512_reduce_add_ph(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c new file mode 100644 index 0000000000000..6c042beef88fd --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +#include <immintrin.h> + +_Float16 test_mm256_reduce_add_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_add_ph + // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_add_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}}) + return _mm256_reduce_add_ph(__W); +} + +_Float16 test_mm_reduce_add_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_add_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_add_ph + // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_add_ph + // LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_add_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}}) + return _mm_reduce_add_ph(__W); +} >From f216010a54aeed70b1ac605d9bdd812b91a41586 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 10 Dec 2025 05:40:24 -0800 Subject: [PATCH 2/5] [CIR] Implement reduce fmul --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 25 ++++++++++++++- .../CIR/CodeGen/X86/avx512-reduceIntrin.c | 31 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 +++++++++ .../X86/avx512vlfp16-builtins.c | 31 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index eaf55fdae6b47..c0055293c5a13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1528,7 +1528,30 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmul_ps512: case X86::BI__builtin_ia32_reduce_fmul_ph512: case X86::BI__builtin_ia32_reduce_fmul_ph256: - case X86::BI__builtin_ia32_reduce_fmul_ph128: + case X86::BI__builtin_ia32_reduce_fmul_ph128: { + StringRef intrinsicName = ""; + switch (builtinID) { + case X86::BI__builtin_ia32_reduce_fmul_pd512: + intrinsicName = "vector.reduce.fmul.v8f64"; + break; + case X86::BI__builtin_ia32_reduce_fmul_ps512: + intrinsicName = "vector.reduce.fmul.v16f32"; + break; + case X86::BI__builtin_ia32_reduce_fmul_ph512: + intrinsicName = "vector.reduce.fmul.v32f16"; + break; + case X86::BI__builtin_ia32_reduce_fmul_ph256: + intrinsicName = "vector.reduce.fmul.v16f16"; + break; + case X86::BI__builtin_ia32_reduce_fmul_ph128: + intrinsicName = "vector.reduce.fmul.v8f16"; + break; + } + assert(!cir::MissingFeatures::fastMathFlags()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + intrinsicName, ops[0].getType(), + mlir::ValueRange{ops[0], ops[1]}); + } case X86::BI__builtin_ia32_reduce_fmax_pd512: case X86::BI__builtin_ia32_reduce_fmax_ps512: case X86::BI__builtin_ia32_reduce_fmax_ph512: diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c index a9ba2ba688b22..9b956b80adf8c 100644 --- a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c @@ -22,6 +22,23 @@ double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){ return _mm512_reduce_add_pd(__W) + ExtraAddOp; } +double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){ + // CIR-LABEL: _mm512_reduce_mul_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_mul_pd + // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_mul_pd + // LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_pd + // OGCG-NOT: reassoc + // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}}) + // OGCG-NOT: reassoc + return _mm512_reduce_mul_pd(__W) * ExtraMulOp; +} + float test_mm512_reduce_add_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_add_ps @@ -38,3 +55,17 @@ float test_mm512_reduce_add_ps(__m512 __W){ return _mm512_reduce_add_ps(__W); } +float test_mm512_reduce_mul_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_mul_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_mul_ps + // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_mul_ps + // LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_ps + // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}}) + return _mm512_reduce_mul_ps(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index b67d51dd3cb11..0c2234fd9dd15 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -79,3 +79,18 @@ _Float16 test_mm512_reduce_add_ph(__m512h __W) { // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}}) return _mm512_reduce_add_ph(__W); } + +_Float16 test_mm512_reduce_mul_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_mul_ph + // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_mul_ph + // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}}) + return _mm512_reduce_mul_ph(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c index 6c042beef88fd..f7156427a5ad8 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -22,6 +22,21 @@ _Float16 test_mm256_reduce_add_ph(__m256h __W) { return _mm256_reduce_add_ph(__W); } +_Float16 test_mm256_reduce_mul_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_mul_ph + // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_mul_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}}) + return _mm256_reduce_mul_ph(__W); +} + _Float16 test_mm_reduce_add_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_add_ph // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -36,3 +51,19 @@ _Float16 test_mm_reduce_add_ph(__m128h __W) { // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}}) return _mm_reduce_add_ph(__W); } + +_Float16 test_mm_reduce_mul_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_mul_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_mul_ph + // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_mul_ph + // LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_mul_ph + // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}}) + return _mm_reduce_mul_ph(__W); +} + >From 8bed48d7f6f20999c6b249c5885defc1b01a5fd1 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 10 Dec 2025 06:35:14 -0800 Subject: [PATCH 3/5] [CIR] Implement reduce fmax --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 26 ++++++++++++- .../CodeGen/X86/avx512-reduceMinMaxIntrin.c | 37 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 ++++++++ .../X86/avx512vlfp16-builtins.c | 30 +++++++++++++++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index c0055293c5a13..5d4b81751bc0f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1556,7 +1556,31 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmax_ps512: case X86::BI__builtin_ia32_reduce_fmax_ph512: case X86::BI__builtin_ia32_reduce_fmax_ph256: - case X86::BI__builtin_ia32_reduce_fmax_ph128: + case X86::BI__builtin_ia32_reduce_fmax_ph128: { + StringRef intrinsicName = ""; + switch (builtinID) { + case X86::BI__builtin_ia32_reduce_fmax_pd512: + intrinsicName = "vector.reduce.fmax.v8f64"; + break; + case X86::BI__builtin_ia32_reduce_fmax_ps512: + intrinsicName = "vector.reduce.fmax.v16f32"; + break; + case X86::BI__builtin_ia32_reduce_fmax_ph512: + intrinsicName = "vector.reduce.fmax.v32f16"; + break; + case X86::BI__builtin_ia32_reduce_fmax_ph256: + intrinsicName = "vector.reduce.fmax.v16f16"; + break; + case X86::BI__builtin_ia32_reduce_fmax_ph128: + intrinsicName = "vector.reduce.fmax.v8f16"; + break; + } + assert(!cir::MissingFeatures::fastMathFlags()); + cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + intrinsicName, vecTy.getElementType(), + mlir::ValueRange{ops[0]}); + } case X86::BI__builtin_ia32_reduce_fmin_pd512: case X86::BI__builtin_ia32_reduce_fmin_ps512: case X86::BI__builtin_ia32_reduce_fmin_ph512: diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c new file mode 100644 index 0000000000000..548182ef7e74c --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM +// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +#include <immintrin.h> + +double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){ + // CIR-LABEL: _mm512_reduce_max_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_max_pd + // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_max_pd + // LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_pd + // OGCG-NOT: nnan + // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}}) + // OGCG-NOT: nnan + return _mm512_reduce_max_pd(__W) + ExtraAddOp; +} + +float test_mm512_reduce_max_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_max_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_max_ps + // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_max_ps + // LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_ps + // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}}) + return _mm512_reduce_max_ps(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index 0c2234fd9dd15..d1a2be041a2a4 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -94,3 +94,18 @@ _Float16 test_mm512_reduce_mul_ph(__m512h __W) { // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}}) return _mm512_reduce_mul_ph(__W); } + +_Float16 test_mm512_reduce_max_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_max_ph + // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_max_ph + // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}}) + return _mm512_reduce_max_ph(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c index f7156427a5ad8..12ff135037cf3 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -37,6 +37,21 @@ _Float16 test_mm256_reduce_mul_ph(__m256h __W) { return _mm256_reduce_mul_ph(__W); } +_Float16 test_mm256_reduce_max_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f16" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_max_ph + // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_max_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}}) + return _mm256_reduce_max_ph(__W); +} + _Float16 test_mm_reduce_add_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_add_ph // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -67,3 +82,18 @@ _Float16 test_mm_reduce_mul_ph(__m128h __W) { return _mm_reduce_mul_ph(__W); } +_Float16 test_mm_reduce_max_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_max_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f16" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_max_ph + // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_max_ph + // LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_max_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}}) + return _mm_reduce_max_ph(__W); +} + >From d54e553430e0e07bd77175f2e4e06955f3caca2d Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 10 Dec 2025 07:08:27 -0800 Subject: [PATCH 4/5] [CIR] Implement reduce fmin --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 27 +++++++++++++++- .../CodeGen/X86/avx512-reduceMinMaxIntrin.c | 32 +++++++++++++++++++ .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 15 +++++++++ .../X86/avx512vlfp16-builtins.c | 30 +++++++++++++++++ 4 files changed, 103 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 5d4b81751bc0f..9ffbbc03edb11 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1585,7 +1585,32 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmin_ps512: case X86::BI__builtin_ia32_reduce_fmin_ph512: case X86::BI__builtin_ia32_reduce_fmin_ph256: - case X86::BI__builtin_ia32_reduce_fmin_ph128: + case X86::BI__builtin_ia32_reduce_fmin_ph128: { + StringRef intrinsicName = ""; + switch (builtinID) { + case X86::BI__builtin_ia32_reduce_fmin_pd512: + intrinsicName = "vector.reduce.fmin.v8f64"; + break; + case X86::BI__builtin_ia32_reduce_fmin_ps512: + intrinsicName = "vector.reduce.fmin.v16f32"; + break; + case X86::BI__builtin_ia32_reduce_fmin_ph512: + intrinsicName = "vector.reduce.fmin.v32f16"; + break; + case X86::BI__builtin_ia32_reduce_fmin_ph256: + intrinsicName = "vector.reduce.fmin.v16f16"; + break; + case X86::BI__builtin_ia32_reduce_fmin_ph128: + intrinsicName = "vector.reduce.fmin.v8f16"; + break; + } + + assert(!cir::MissingFeatures::fastMathFlags()); + cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); + return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), + intrinsicName, vecTy.getElementType(), + mlir::ValueRange{ops[0]}); + } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: case X86::BI__builtin_ia32_rdrand64_step: diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c index 548182ef7e74c..be2865f9a4934 100644 --- a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -21,6 +21,23 @@ double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){ return _mm512_reduce_max_pd(__W) + ExtraAddOp; } +double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){ + // CIR-LABEL: _mm512_reduce_min_pd + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + + // CIR-LABEL: test_mm512_reduce_min_pd + // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double + + // LLVM-LABEL: test_mm512_reduce_min_pd + // LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_pd + // OGCG-NOT: nnan + // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}}) + // OGCG-NOT: nnan + return _mm512_reduce_min_pd(__W) * ExtraMulOp; +} + float test_mm512_reduce_max_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_max_ps // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float @@ -35,3 +52,18 @@ float test_mm512_reduce_max_ps(__m512 __W){ // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}}) return _mm512_reduce_max_ps(__W); } + +float test_mm512_reduce_min_ps(__m512 __W){ + // CIR-LABEL: _mm512_reduce_min_ps + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + + // CIR-LABEL: test_mm512_reduce_min_ps + // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float + + // LLVM-LABEL: test_mm512_reduce_min_ps + // LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_ps + // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}}) + return _mm512_reduce_min_ps(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index d1a2be041a2a4..94a3834f09a01 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -109,3 +109,18 @@ _Float16 test_mm512_reduce_max_ph(__m512h __W) { // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}}) return _mm512_reduce_max_ph(__W); } + +_Float16 test_mm512_reduce_min_ph(__m512h __W) { + // CIR-LABEL: _mm512_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm512_reduce_min_ph + // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm512_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}}) + + // OGCG-LABEL: test_mm512_reduce_min_ph + // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}}) + return _mm512_reduce_min_ph(__W); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c index 12ff135037cf3..f1dfe56861eec 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -52,6 +52,21 @@ _Float16 test_mm256_reduce_max_ph(__m256h __W) { return _mm256_reduce_max_ph(__W); } +_Float16 test_mm256_reduce_min_ph(__m256h __W) { + // CIR-LABEL: _mm256_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f16" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm256_reduce_min_ph + // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm256_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}}) + + // OGCG-LABEL: test_mm256_reduce_min_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}}) + return _mm256_reduce_min_ph(__W); +} + _Float16 test_mm_reduce_add_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_add_ph // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -97,3 +112,18 @@ _Float16 test_mm_reduce_max_ph(__m128h __W) { return _mm_reduce_max_ph(__W); } +_Float16 test_mm_reduce_min_ph(__m128h __W) { + // CIR-LABEL: _mm_reduce_min_ph + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f16" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // CIR-LABEL: test_mm_reduce_min_ph + // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + + // LLVM-LABEL: test_mm_reduce_min_ph + // LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}}) + + // OGCG-LABEL: test_mm_reduce_min_ph + // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}}) + return _mm_reduce_min_ph(__W); +} + >From 7dda4392cbb9484cd23e48036499703ac34db310 Mon Sep 17 00:00:00 2001 From: Jasmine Tang <[email protected]> Date: Wed, 10 Dec 2025 22:47:21 -0800 Subject: [PATCH 5/5] Remove suffix from string intrinsics --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 77 +------------------ .../CIR/CodeGen/X86/avx512-reduceIntrin.c | 8 +- .../CodeGen/X86/avx512-reduceMinMaxIntrin.c | 8 +- .../CodeGenBuiltins/X86/avx512fp16-builtins.c | 8 +- .../X86/avx512vlfp16-builtins.c | 16 ++-- 5 files changed, 24 insertions(+), 93 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 9ffbbc03edb11..51e2a9de000cb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1501,24 +1501,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fadd_ph512: case X86::BI__builtin_ia32_reduce_fadd_ph256: case X86::BI__builtin_ia32_reduce_fadd_ph128: { - StringRef intrinsicName = ""; - switch (builtinID) { - case X86::BI__builtin_ia32_reduce_fadd_pd512: - intrinsicName = "vector.reduce.fadd.v8f64"; - break; - case X86::BI__builtin_ia32_reduce_fadd_ps512: - intrinsicName = "vector.reduce.fadd.v16f32"; - break; - case X86::BI__builtin_ia32_reduce_fadd_ph512: - intrinsicName = "vector.reduce.fadd.v32f16"; - break; - case X86::BI__builtin_ia32_reduce_fadd_ph256: - intrinsicName = "vector.reduce.fadd.v16f16"; - break; - case X86::BI__builtin_ia32_reduce_fadd_ph128: - intrinsicName = "vector.reduce.fadd.v8f16"; - break; - } + StringRef intrinsicName = "vector.reduce.fadd"; assert(!cir::MissingFeatures::fastMathFlags()); return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName, ops[0].getType(), @@ -1529,24 +1512,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmul_ph512: case X86::BI__builtin_ia32_reduce_fmul_ph256: case X86::BI__builtin_ia32_reduce_fmul_ph128: { - StringRef intrinsicName = ""; - switch (builtinID) { - case X86::BI__builtin_ia32_reduce_fmul_pd512: - intrinsicName = "vector.reduce.fmul.v8f64"; - break; - case X86::BI__builtin_ia32_reduce_fmul_ps512: - intrinsicName = "vector.reduce.fmul.v16f32"; - break; - case X86::BI__builtin_ia32_reduce_fmul_ph512: - intrinsicName = "vector.reduce.fmul.v32f16"; - break; - case X86::BI__builtin_ia32_reduce_fmul_ph256: - intrinsicName = "vector.reduce.fmul.v16f16"; - break; - case X86::BI__builtin_ia32_reduce_fmul_ph128: - intrinsicName = "vector.reduce.fmul.v8f16"; - break; - } + StringRef intrinsicName = "vector.reduce.fmul"; assert(!cir::MissingFeatures::fastMathFlags()); return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName, ops[0].getType(), @@ -1557,24 +1523,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmax_ph512: case X86::BI__builtin_ia32_reduce_fmax_ph256: case X86::BI__builtin_ia32_reduce_fmax_ph128: { - StringRef intrinsicName = ""; - switch (builtinID) { - case X86::BI__builtin_ia32_reduce_fmax_pd512: - intrinsicName = "vector.reduce.fmax.v8f64"; - break; - case X86::BI__builtin_ia32_reduce_fmax_ps512: - intrinsicName = "vector.reduce.fmax.v16f32"; - break; - case X86::BI__builtin_ia32_reduce_fmax_ph512: - intrinsicName = "vector.reduce.fmax.v32f16"; - break; - case X86::BI__builtin_ia32_reduce_fmax_ph256: - intrinsicName = "vector.reduce.fmax.v16f16"; - break; - case X86::BI__builtin_ia32_reduce_fmax_ph128: - intrinsicName = "vector.reduce.fmax.v8f16"; - break; - } + StringRef intrinsicName = "vector.reduce.fmax"; assert(!cir::MissingFeatures::fastMathFlags()); cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), @@ -1586,25 +1535,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_reduce_fmin_ph512: case X86::BI__builtin_ia32_reduce_fmin_ph256: case X86::BI__builtin_ia32_reduce_fmin_ph128: { - StringRef intrinsicName = ""; - switch (builtinID) { - case X86::BI__builtin_ia32_reduce_fmin_pd512: - intrinsicName = "vector.reduce.fmin.v8f64"; - break; - case X86::BI__builtin_ia32_reduce_fmin_ps512: - intrinsicName = "vector.reduce.fmin.v16f32"; - break; - case X86::BI__builtin_ia32_reduce_fmin_ph512: - intrinsicName = "vector.reduce.fmin.v32f16"; - break; - case X86::BI__builtin_ia32_reduce_fmin_ph256: - intrinsicName = "vector.reduce.fmin.v16f16"; - break; - case X86::BI__builtin_ia32_reduce_fmin_ph128: - intrinsicName = "vector.reduce.fmin.v8f16"; - break; - } - + StringRef intrinsicName = "vector.reduce.fmin"; assert(!cir::MissingFeatures::fastMathFlags()); cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType()); return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c index 9b956b80adf8c..bc4249ffd25fc 100644 --- a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c @@ -7,7 +7,7 @@ double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){ // CIR-LABEL: _mm512_reduce_add_pd - // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double // CIR-LABEL: test_mm512_reduce_add_pd // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double @@ -24,7 +24,7 @@ double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){ double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){ // CIR-LABEL: _mm512_reduce_mul_pd - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double // CIR-LABEL: test_mm512_reduce_mul_pd // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double @@ -42,7 +42,7 @@ double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){ float test_mm512_reduce_add_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_add_ps - // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float // CIR-LABEL: test_mm512_reduce_add_ps // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float @@ -57,7 +57,7 @@ float test_mm512_reduce_add_ps(__m512 __W){ float test_mm512_reduce_mul_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_mul_ps - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float // CIR-LABEL: test_mm512_reduce_mul_ps // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c index be2865f9a4934..104e76fa6ad03 100644 --- a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c +++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c @@ -6,7 +6,7 @@ double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){ // CIR-LABEL: _mm512_reduce_max_pd - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double // CIR-LABEL: test_mm512_reduce_max_pd // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double @@ -23,7 +23,7 @@ double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){ double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){ // CIR-LABEL: _mm512_reduce_min_pd - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double // CIR-LABEL: test_mm512_reduce_min_pd // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double @@ -40,7 +40,7 @@ double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){ float test_mm512_reduce_max_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_max_ps - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float // CIR-LABEL: test_mm512_reduce_max_ps // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float @@ -55,7 +55,7 @@ float test_mm512_reduce_max_ps(__m512 __W){ float test_mm512_reduce_min_ps(__m512 __W){ // CIR-LABEL: _mm512_reduce_min_ps - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float // CIR-LABEL: test_mm512_reduce_min_ps // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c index 94a3834f09a01..464fa29fffc20 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c @@ -67,7 +67,7 @@ __m512h test_mm512_undefined_ph(void) { _Float16 test_mm512_reduce_add_ph(__m512h __W) { // CIR-LABEL: _mm512_reduce_add_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm512_reduce_add_ph // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 @@ -82,7 +82,7 @@ _Float16 test_mm512_reduce_add_ph(__m512h __W) { _Float16 test_mm512_reduce_mul_ph(__m512h __W) { // CIR-LABEL: _mm512_reduce_mul_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm512_reduce_mul_ph // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 @@ -97,7 +97,7 @@ _Float16 test_mm512_reduce_mul_ph(__m512h __W) { _Float16 test_mm512_reduce_max_ph(__m512h __W) { // CIR-LABEL: _mm512_reduce_max_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm512_reduce_max_ph // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 @@ -112,7 +112,7 @@ _Float16 test_mm512_reduce_max_ph(__m512h __W) { _Float16 test_mm512_reduce_min_ph(__m512h __W) { // CIR-LABEL: _mm512_reduce_min_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm512_reduce_min_ph // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16 diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c index f1dfe56861eec..994fdfec23c2c 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c @@ -9,7 +9,7 @@ _Float16 test_mm256_reduce_add_ph(__m256h __W) { // CIR-LABEL: _mm256_reduce_add_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm256_reduce_add_ph // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 @@ -24,7 +24,7 @@ _Float16 test_mm256_reduce_add_ph(__m256h __W) { _Float16 test_mm256_reduce_mul_ph(__m256h __W) { // CIR-LABEL: _mm256_reduce_mul_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm256_reduce_mul_ph // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 @@ -39,7 +39,7 @@ _Float16 test_mm256_reduce_mul_ph(__m256h __W) { _Float16 test_mm256_reduce_max_ph(__m256h __W) { // CIR-LABEL: _mm256_reduce_max_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f16" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm256_reduce_max_ph // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 @@ -54,7 +54,7 @@ _Float16 test_mm256_reduce_max_ph(__m256h __W) { _Float16 test_mm256_reduce_min_ph(__m256h __W) { // CIR-LABEL: _mm256_reduce_min_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f16" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm256_reduce_min_ph // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16 @@ -69,7 +69,7 @@ _Float16 test_mm256_reduce_min_ph(__m256h __W) { _Float16 test_mm_reduce_add_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_add_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm_reduce_add_ph // CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -84,7 +84,7 @@ _Float16 test_mm_reduce_add_ph(__m128h __W) { _Float16 test_mm_reduce_mul_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_mul_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm_reduce_mul_ph // CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -99,7 +99,7 @@ _Float16 test_mm_reduce_mul_ph(__m128h __W) { _Float16 test_mm_reduce_max_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_max_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f16" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm_reduce_max_ph // CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 @@ -114,7 +114,7 @@ _Float16 test_mm_reduce_max_ph(__m128h __W) { _Float16 test_mm_reduce_min_ph(__m128h __W) { // CIR-LABEL: _mm_reduce_min_ph - // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f16" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16 + // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16 // CIR-LABEL: test_mm_reduce_min_ph // CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
