https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/131781
Support auto-vectorize for fminimum_num and fmaximum_num. For ARM64 with SVE, scalable vector cannot support yet, and For RISCV Vector, scalable vector works well now. >From e367aaa410dcbb6f3d1c5803eac49dde6dae25c5 Mon Sep 17 00:00:00 2001 From: YunQiang Su <yunqi...@isrc.iscas.ac.cn> Date: Tue, 18 Mar 2025 18:46:29 +0800 Subject: [PATCH] Vectorize: Support fminimumnum and fmaximumnum Support auto-vectorize for fminimum_num and fmaximum_num. For ARM64 with SVE, scalable vector cannot support yet, and For RISCV Vector, scalable vector works well now. --- clang/test/CodeGen/fminimum-num-autovec.c | 407 ++++++++++++++++++++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 + llvm/lib/Analysis/VectorUtils.cpp | 2 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +- 4 files changed, 426 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/fminimum-num-autovec.c diff --git a/clang/test/CodeGen/fminimum-num-autovec.c b/clang/test/CodeGen/fminimum-num-autovec.c new file mode 100644 index 0000000000000..94114b6227d27 --- /dev/null +++ b/clang/test/CodeGen/fminimum-num-autovec.c @@ -0,0 +1,407 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang --target=aarch64-unknown-linux-gnu -march=armv8+fp16 %s -O3 -emit-llvm -S -o - | FileCheck %s --check-prefix=ARMV8 +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64gv_zvfh %s -O3 -emit-llvm -S -o - | FileCheck %s --check-prefix=RV64_ZVFH +// FIXME: SVE cannot emit VSCALE. + + +float af32[4096]; +float bf32[4096]; +float cf32[4096]; +// ARMV8-LABEL: define dso_local void @f32min( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @af32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA6:![0-9]+]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @bf32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD11]], <4 x float> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @cf32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <4 x float> [[TMP4]], ptr [[TMP6]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f32min( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @af32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4, !tbaa [[TBAA9:![0-9]+]] +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @bf32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x float>, ptr [[TMP3]], align 4, !tbaa [[TBAA9]] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @cf32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 4 x float> [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[TBAA9]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// +void f32min() { + for (int i=0; i<4096; i++) {cf32[i] = __builtin_fminimum_numf(af32[i], bf32[i]);} +} +// ARMV8-LABEL: define dso_local void @f32max( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @af32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @bf32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD11]], <4 x float> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @cf32, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <4 x float> [[TMP4]], ptr [[TMP6]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4, !tbaa [[TBAA6]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f32max( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @af32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4, !tbaa [[TBAA9]] +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @bf32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x float>, ptr [[TMP3]], align 4, !tbaa [[TBAA9]] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.maximumnum.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr @cf32, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 4 x float> [[TMP4]], ptr [[TMP5]], align 4, !tbaa [[TBAA9]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// +void f32max() { + for (int i=0; i<4096; i++) {cf32[i] = __builtin_fmaximum_numf(af32[i], bf32[i]);} +} + +double af64[4096]; +double bf64[4096]; +double cf64[4096]; +// ARMV8-LABEL: define dso_local void @f64min( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @af64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA15:![0-9]+]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @bf64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP3]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD11]], <2 x double> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @cf64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f64min( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @af64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA18:![0-9]+]] +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @bf64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 2 x double>, ptr [[TMP3]], align 8, !tbaa [[TBAA18]] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @cf64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 2 x double> [[TMP4]], ptr [[TMP5]], align 8, !tbaa [[TBAA18]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// +void f64min() { + for (int i=0; i<4096; i++) {cf64[i] = __builtin_fminimum_num(af64[i], bf64[i]);} +} +// ARMV8-LABEL: define dso_local void @f64max( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @af64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @bf64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP3]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD11]], <2 x double> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @cf64, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <2 x double> [[TMP4]], ptr [[TMP6]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 8, !tbaa [[TBAA15]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f64max( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @af64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP2]], align 8, !tbaa [[TBAA18]] +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @bf64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 2 x double>, ptr [[TMP3]], align 8, !tbaa [[TBAA18]] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> [[WIDE_LOAD]], <vscale x 2 x double> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr @cf64, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 2 x double> [[TMP4]], ptr [[TMP5]], align 8, !tbaa [[TBAA18]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[TMP6]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// +void f64max() { + for (int i=0; i<4096; i++) {cf64[i] = __builtin_fmaximum_num(af64[i], bf64[i]);} +} + +__fp16 af16[4096]; +__fp16 bf16[4096]; +__fp16 cf16[4096]; +// ARMV8-LABEL: define dso_local void @f16min( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP0]], align 2, !tbaa [[TBAA19:![0-9]+]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x half>, ptr [[TMP1]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x half>, ptr [[TMP2]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x half>, ptr [[TMP3]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD11]], <8 x half> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <8 x half> [[TMP4]], ptr [[TMP6]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: store <8 x half> [[TMP5]], ptr [[TMP7]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f16min( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ugt i64 [[TMP0]], 512 +// RV64_ZVFH-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]] +// RV64_ZVFH: [[VECTOR_PH]]: +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[DOTNEG:%.*]] = mul nuw nsw i64 [[TMP1]], 8184 +// RV64_ZVFH-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], 4096 +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP4]], align 2, !tbaa [[TBAA22:![0-9]+]] +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x half>, ptr [[TMP5]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 8 x half> [[TMP6]], ptr [[TMP7]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +// RV64_ZVFH-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +// RV64_ZVFH-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +// RV64_ZVFH: [[MIDDLE_BLOCK]]: +// RV64_ZVFH-NEXT: [[CMP_N_NOT:%.*]] = icmp eq i64 [[N_VEC]], 0 +// RV64_ZVFH-NEXT: br i1 [[CMP_N_NOT]], label %[[FOR_BODY_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]] +// RV64_ZVFH: [[FOR_BODY_PREHEADER]]: +// RV64_ZVFH-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +// RV64_ZVFH-NEXT: br label %[[FOR_BODY:.*]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// RV64_ZVFH: [[FOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER]] ] +// RV64_ZVFH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: [[TMP9:%.*]] = load half, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: [[TMP10:%.*]] = load half, ptr [[ARRAYIDX2]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[TMP11:%.*]] = tail call half @llvm.minimumnum.f16(half [[TMP9]], half [[TMP10]]) +// RV64_ZVFH-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: store half [[TMP11]], ptr [[ARRAYIDX4]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +// RV64_ZVFH-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +// +void f16min() { + for (int i=0; i<4096; i++) {cf16[i] = __builtin_fminimum_numf16(af16[i], bf16[i]);} +} +// ARMV8-LABEL: define dso_local void @f16max( +// ARMV8-SAME: ) local_unnamed_addr #[[ATTR0]] { +// ARMV8-NEXT: [[ENTRY:.*]]: +// ARMV8-NEXT: br label %[[VECTOR_BODY:.*]] +// ARMV8: [[VECTOR_BODY]]: +// ARMV8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// ARMV8-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP0]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x half>, ptr [[TMP1]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 +// ARMV8-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x half>, ptr [[TMP2]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x half>, ptr [[TMP3]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[TMP4:%.*]] = tail call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD12]]) +// ARMV8-NEXT: [[TMP5:%.*]] = tail call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD11]], <8 x half> [[WIDE_LOAD13]]) +// ARMV8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDEX]] +// ARMV8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 +// ARMV8-NEXT: store <8 x half> [[TMP4]], ptr [[TMP6]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: store <8 x half> [[TMP5]], ptr [[TMP7]], align 2, !tbaa [[TBAA19]] +// ARMV8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +// ARMV8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 +// ARMV8-NEXT: br i1 [[TMP8]], label %[[FOR_COND_CLEANUP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +// ARMV8: [[FOR_COND_CLEANUP]]: +// ARMV8-NEXT: ret void +// +// RV64_ZVFH-LABEL: define dso_local void @f16max( +// RV64_ZVFH-SAME: ) local_unnamed_addr #[[ATTR0]] { +// RV64_ZVFH-NEXT: [[ENTRY:.*]]: +// RV64_ZVFH-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ugt i64 [[TMP0]], 512 +// RV64_ZVFH-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]] +// RV64_ZVFH: [[VECTOR_PH]]: +// RV64_ZVFH-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[DOTNEG:%.*]] = mul nuw nsw i64 [[TMP1]], 8184 +// RV64_ZVFH-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], 4096 +// RV64_ZVFH-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() +// RV64_ZVFH-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3 +// RV64_ZVFH-NEXT: br label %[[VECTOR_BODY:.*]] +// RV64_ZVFH: [[VECTOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +// RV64_ZVFH-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP4]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 8 x half>, ptr [[TMP5]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD10]]) +// RV64_ZVFH-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDEX]] +// RV64_ZVFH-NEXT: store <vscale x 8 x half> [[TMP6]], ptr [[TMP7]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +// RV64_ZVFH-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +// RV64_ZVFH-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +// RV64_ZVFH: [[MIDDLE_BLOCK]]: +// RV64_ZVFH-NEXT: [[CMP_N_NOT:%.*]] = icmp eq i64 [[N_VEC]], 0 +// RV64_ZVFH-NEXT: br i1 [[CMP_N_NOT]], label %[[FOR_BODY_PREHEADER]], label %[[FOR_COND_CLEANUP:.*]] +// RV64_ZVFH: [[FOR_BODY_PREHEADER]]: +// RV64_ZVFH-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +// RV64_ZVFH-NEXT: br label %[[FOR_BODY:.*]] +// RV64_ZVFH: [[FOR_COND_CLEANUP]]: +// RV64_ZVFH-NEXT: ret void +// RV64_ZVFH: [[FOR_BODY]]: +// RV64_ZVFH-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER]] ] +// RV64_ZVFH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @af16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: [[TMP9:%.*]] = load half, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @bf16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: [[TMP10:%.*]] = load half, ptr [[ARRAYIDX2]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[TMP11:%.*]] = tail call half @llvm.maximumnum.f16(half [[TMP9]], half [[TMP10]]) +// RV64_ZVFH-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr @cf16, i64 0, i64 [[INDVARS_IV]] +// RV64_ZVFH-NEXT: store half [[TMP11]], ptr [[ARRAYIDX4]], align 2, !tbaa [[TBAA22]] +// RV64_ZVFH-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +// RV64_ZVFH-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4096 +// RV64_ZVFH-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +// +void f16max() { + for (int i=0; i<4096; i++) {cf16[i] = __builtin_fmaximum_numf16(af16[i], bf16[i]);} +} + +//. +// ARMV8: [[TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// ARMV8: [[META7]] = !{!"float", [[META8:![0-9]+]], i64 0} +// ARMV8: [[META8]] = !{!"omnipotent char", [[META9:![0-9]+]], i64 0} +// ARMV8: [[META9]] = !{!"Simple C/C++ TBAA"} +// ARMV8: [[LOOP10]] = distinct !{[[LOOP10]], [[META11:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]]} +// ARMV8: [[META11]] = !{!"llvm.loop.mustprogress"} +// ARMV8: [[META12]] = !{!"llvm.loop.isvectorized", i32 1} +// ARMV8: [[META13]] = !{!"llvm.loop.unroll.runtime.disable"} +// ARMV8: [[LOOP14]] = distinct !{[[LOOP14]], [[META11]], [[META12]], [[META13]]} +// ARMV8: [[TBAA15]] = !{[[META16:![0-9]+]], [[META16]], i64 0} +// ARMV8: [[META16]] = !{!"double", [[META8]], i64 0} +// ARMV8: [[LOOP17]] = distinct !{[[LOOP17]], [[META11]], [[META12]], [[META13]]} +// ARMV8: [[LOOP18]] = distinct !{[[LOOP18]], [[META11]], [[META12]], [[META13]]} +// ARMV8: [[TBAA19]] = !{[[META20:![0-9]+]], [[META20]], i64 0} +// ARMV8: [[META20]] = !{!"__fp16", [[META8]], i64 0} +// ARMV8: [[LOOP21]] = distinct !{[[LOOP21]], [[META11]], [[META12]], [[META13]]} +// ARMV8: [[LOOP22]] = distinct !{[[LOOP22]], [[META11]], [[META12]], [[META13]]} +//. +// RV64_ZVFH: [[TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0} +// RV64_ZVFH: [[META10]] = !{!"float", [[META11:![0-9]+]], i64 0} +// RV64_ZVFH: [[META11]] = !{!"omnipotent char", [[META12:![0-9]+]], i64 0} +// RV64_ZVFH: [[META12]] = !{!"Simple C/C++ TBAA"} +// RV64_ZVFH: [[LOOP13]] = distinct !{[[LOOP13]], [[META14:![0-9]+]], [[META15:![0-9]+]], [[META16:![0-9]+]]} +// RV64_ZVFH: [[META14]] = !{!"llvm.loop.mustprogress"} +// RV64_ZVFH: [[META15]] = !{!"llvm.loop.isvectorized", i32 1} +// RV64_ZVFH: [[META16]] = !{!"llvm.loop.unroll.runtime.disable"} +// RV64_ZVFH: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META15]], [[META16]]} +// RV64_ZVFH: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +// RV64_ZVFH: [[META19]] = !{!"double", [[META11]], i64 0} +// RV64_ZVFH: [[LOOP20]] = distinct !{[[LOOP20]], [[META14]], [[META15]], [[META16]]} +// RV64_ZVFH: [[LOOP21]] = distinct !{[[LOOP21]], [[META14]], [[META15]], [[META16]]} +// RV64_ZVFH: [[TBAA22]] = !{[[META23:![0-9]+]], [[META23]], i64 0} +// RV64_ZVFH: [[META23]] = !{!"__fp16", [[META11]], i64 0} +// RV64_ZVFH: [[LOOP24]] = distinct !{[[LOOP24]], [[META14]], [[META15]], [[META16]]} +// RV64_ZVFH: [[LOOP25]] = distinct !{[[LOOP25]], [[META14]], [[META16]], [[META15]]} +// RV64_ZVFH: [[LOOP26]] = distinct !{[[LOOP26]], [[META14]], [[META15]], [[META16]]} +// RV64_ZVFH: [[LOOP27]] = distinct !{[[LOOP27]], [[META14]], [[META16]], [[META15]]} +//. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d46859bcb0517..db6d77aab70f7 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2776,6 +2776,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { } return Cost; } + case Intrinsic::maximumnum: + case Intrinsic::minimumnum: { + if (TLI->isOperationLegalOrPromote(llvm::ISD::FMAXNUM_IEEE, LT.second)) + return LT.first * 3; + break; + } default: break; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 91ba68fe03324..422058be22edb 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -89,6 +89,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: + case Intrinsic::minimumnum: + case Intrinsic::maximumnum: case Intrinsic::modf: case Intrinsic::copysign: case Intrinsic::floor: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 27a4bbce1f5fc..f393f5681d641 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -961,6 +961,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, static const unsigned ZvfhminZvfbfminPromoteOps[] = { ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUMNUM, + ISD::FMAXIMUMNUM, ISD::FADD, ISD::FSUB, ISD::FMUL, @@ -1029,7 +1031,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Expand various condition codes (explained above). setCondCodeAction(VFPCCToExpand, VT, Expand); - setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); + setOperationAction( + {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT, + Legal); setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, @@ -1444,7 +1448,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, - ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, + ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS, + ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, @@ -6688,9 +6693,11 @@ static unsigned getRISCVVLOp(SDValue Op) { case ISD::VP_FP_TO_UINT: return RISCVISD::VFCVT_RTZ_XU_F_VL; case ISD::FMINNUM: + case ISD::FMINIMUMNUM: case ISD::VP_FMINNUM: return RISCVISD::VFMIN_VL; case ISD::FMAXNUM: + case ISD::FMAXIMUMNUM: case ISD::VP_FMAXNUM: return RISCVISD::VFMAX_VL; case ISD::LRINT: @@ -7720,6 +7727,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FMA: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVectorOp(Op, DAG); [[fallthrough]]; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits