https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/91425
>From 2fc32a278e4fd46c6dd085845e69e84c321a3f75 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Mon, 6 May 2024 10:59:44 +0800 Subject: [PATCH 1/2] [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) AVX doesn't provide 16-bit BROADCAST instruction. Fixes #91005 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/pr91005.ll | 39 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr91005.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c572b27fe401e..3e4ecab8443a9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7295,7 +7295,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || - CVT == MVT::f16 || + (CVT == MVT::f16 && Subtarget.hasAVX2()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) diff --git a/llvm/test/CodeGen/X86/pr91005.ll b/llvm/test/CodeGen/X86/pr91005.ll new file mode 100644 index 0000000000000..97fd1ce456882 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr91005.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +define void @PR91005(ptr %0) minsize { +; CHECK-LABEL: PR91005: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744] +; CHECK-NEXT: vpcmpeqw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %xmm0, %eax +; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: .LBB0_2: # %common.ret +; CHECK-NEXT: retq + %2 = bitcast <2 x half> poison to <2 x i16> + %3 = icmp eq <2 x i16> %2, <i16 31744, i16 31744> + br i1 poison, label %4, label %common.ret + +common.ret: ; preds = %4, %1 + ret void + +4: ; preds = %1 + %5 = select <2 x i1> %3, <2 x half> <half 0xH3C00, half 0xH3C00>, <2 x half> zeroinitializer + %6 = fmul <2 x half> %5, zeroinitializer + %7 = fsub <2 x half> %6, zeroinitializer + %8 = extractelement <2 x half> %7, i64 0 + store half %8, ptr %0, align 2 + br label %common.ret +} + +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) >From 4d284b853f26a6cb848028720163561cabf63d95 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Wed, 8 May 2024 10:59:31 +0800 Subject: [PATCH 2/2] Fix difference with LLVM 18 release --- llvm/test/CodeGen/X86/pr91005.ll | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/pr91005.ll b/llvm/test/CodeGen/X86/pr91005.ll index 97fd1ce456882..16b78bf1e7e17 100644 --- a/llvm/test/CodeGen/X86/pr91005.ll +++ b/llvm/test/CodeGen/X86/pr91005.ll @@ -8,12 +8,13 @@ define void @PR91005(ptr %0) minsize { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744] -; CHECK-NEXT: vpcmpeqw %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: vpextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: vmovd %eax, %xmm0 ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %xmm0, %eax _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits