https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/147034
>From c50b409b6b523fa4b8164b80515a93b12e1b5cd4 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Tue, 18 Mar 2025 13:04:23 +0100 Subject: [PATCH] [X86] Ignore NSW when DstSVT is i32 (#131755) We don't have PACKSS for i64->i32. Fixes: https://godbolt.org/z/qb8nxnPbK, which was introduced by ddd2f57b (cherry picked from commit 3d631914677b58a5479b310f480ac76e27d41e7e) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +- llvm/test/CodeGen/X86/vector-trunc-nowrap.ll | 88 ++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4413fbb77f415..12c40b501f627 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20889,7 +20889,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT, return SDValue(); unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits; - if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) { + if ((Flags.hasNoSignedWrap() && DstSVT != MVT::i32) || + MinSignBits < NumSignBits) { PackOpcode = X86ISD::PACKSS; return In; } diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll index 2b8eedfbbdc9c..863f30e03d2d6 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll @@ -1592,3 +1592,91 @@ entry: %1 = bitcast <8 x i8> %0 to i64 ret i64 %1 } + +define void @foo(<4 x i64> %a, <4 x i64> %b, ptr %p) "min-legal-vector-width"="256" "prefer-vector-width"="256" { +; SSE-LABEL: foo: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, 16(%rdi) +; SSE-NEXT: movaps %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX1-LABEL: foo: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX1-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX1-NEXT: vmovaps %xmm0, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: foo: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-SLOW-NEXT: vzeroupper +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: foo: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-FAST-ALL-NEXT: vzeroupper +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: foo: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] +; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi) +; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-FAST-PERLANE-NEXT: vzeroupper +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512F-LABEL: foo: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: foo: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi) +; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: foo: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: foo: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi) +; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %1 = trunc nsw <8 x i64> %0 to <8 x i32> + store <8 x i32> %1, ptr %p, align 16 + ret void +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits