Author: Shengchen Kan Date: 2023-11-30T17:56:21+08:00 New Revision: eb64697a7b75d2b22041cc992fad0c8dfa7989cb
URL: https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb DIFF: https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb.diff LOG: [X86][Codegen] Correct the domain of VP2INTERSECT GenericDomain -> SSEPackedInt Found by #73654 Added: Modified: llvm/lib/Target/X86/X86InstrAVX512.td llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f325f47d46464c3..0514f0d19506707 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12875,8 +12875,10 @@ multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _ } } +let ExeDomain = SSEPackedInt in { defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; +} multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, diff --git a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll index ef07d30299e9dad..9741972767bcdcd 100644 --- a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll @@ -84,7 +84,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] -; X86-NEXT: vmovaps (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x02] +; X86-NEXT: vmovdqa (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x02] ; X86-NEXT: vp2intersectd (%ecx), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x01] ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] @@ -96,7 +96,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm256_2intersect_epi32_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] +; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] ; X64-NEXT: vp2intersectd (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x06] ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] @@ -125,7 +125,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vmovaps (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x06] +; X86-NEXT: vmovdqa (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x06] ; X86-NEXT: vp2intersectq (%edx), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x02] ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -142,7 +142,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm256_2intersect_epi64_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] +; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] ; X64-NEXT: vp2intersectq (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x06] ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -175,7 +175,7 @@ define void @test_mm256_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] -; X86-NEXT: vbroadcastss (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x02] +; X86-NEXT: vpbroadcastd (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x02] ; X86-NEXT: vp2intersectd (%ecx){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x01] ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] @@ -187,7 +187,7 @@ define void @test_mm256_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm256_2intersect_epi32_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07] +; X64-NEXT: vpbroadcastd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x07] ; X64-NEXT: vp2intersectd (%rsi){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x06] ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] @@ -220,7 +220,7 @@ define void @test_mm256_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vbroadcastsd (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x06] +; X86-NEXT: vpbroadcastq (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x06] ; X86-NEXT: vp2intersectq (%edx){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x02] ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -237,7 +237,7 @@ define void @test_mm256_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm256_2intersect_epi64_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastsd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x07] +; X64-NEXT: vpbroadcastq (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x07] ; X64-NEXT: vp2intersectq (%rsi){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x06] ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -362,7 +362,7 @@ define void @test_mm_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapture ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] +; X86-NEXT: vmovdqa (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x06] ; X86-NEXT: vp2intersectd (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x02] ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -378,7 +378,7 @@ define void @test_mm_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapture ; ; X64-LABEL: test_mm_2intersect_epi32_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] ; X64-NEXT: vp2intersectd (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x06] ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -414,7 +414,7 @@ define void @test_mm_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapture ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] +; X86-NEXT: vmovdqa (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x06] ; X86-NEXT: vp2intersectq (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x02] ; X86-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e] ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] @@ -430,7 +430,7 @@ define void @test_mm_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapture ; ; X64-LABEL: test_mm_2intersect_epi64_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] ; X64-NEXT: vp2intersectq (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x06] ; X64-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e] ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] @@ -466,7 +466,7 @@ define void @test_mm_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapture ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vbroadcastss (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x06] +; X86-NEXT: vpbroadcastd (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x06] ; X86-NEXT: vp2intersectd (%edx){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x02] ; X86-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -482,7 +482,7 @@ define void @test_mm_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapture ; ; X64-LABEL: test_mm_2intersect_epi32_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] +; X64-NEXT: vpbroadcastd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x07] ; X64-NEXT: vp2intersectd (%rsi){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x06] ; X64-NEXT: kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c] ; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] @@ -522,8 +522,7 @@ define void @test_mm_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapture ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vmovddup (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x06] -; X86-NEXT: # xmm0 = mem[0,0] +; X86-NEXT: vpbroadcastq (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x06] ; X86-NEXT: vp2intersectq (%edx){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x02] ; X86-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e] ; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] @@ -539,8 +538,7 @@ define void @test_mm_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapture ; ; X64-LABEL: test_mm_2intersect_epi64_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] -; X64-NEXT: # xmm0 = mem[0,0] +; X64-NEXT: vpbroadcastq (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x07] ; X64-NEXT: vp2intersectq (%rsi){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x06] ; X64-NEXT: kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e] ; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] diff --git a/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll index 39c57e65b48524e..28e3d6dd5d84999 100644 --- a/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll @@ -72,7 +72,7 @@ define void @test_mm512_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vmovaps (%esi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x06] +; X86-NEXT: vmovdqa64 (%esi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x06] ; X86-NEXT: vp2intersectd (%edx), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x02] ; X86-NEXT: kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01] ; X86-NEXT: kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08] @@ -83,7 +83,7 @@ define void @test_mm512_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm512_2intersect_epi32_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] +; X64-NEXT: vmovdqa64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] ; X64-NEXT: vp2intersectd (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x06] ; X64-NEXT: kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02] ; X64-NEXT: kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09] @@ -106,7 +106,7 @@ define void @test_mm512_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] -; X86-NEXT: vmovaps (%edx), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x02] +; X86-NEXT: vmovdqa64 (%edx), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x02] ; X86-NEXT: vp2intersectq (%ecx), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x01] ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] @@ -118,7 +118,7 @@ define void @test_mm512_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm512_2intersect_epi64_p: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] +; X64-NEXT: vmovdqa64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] ; X64-NEXT: vp2intersectq (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x06] ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] @@ -148,7 +148,7 @@ define void @test_mm512_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] -; X86-NEXT: vbroadcastss (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x06] +; X86-NEXT: vpbroadcastd (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x58,0x06] ; X86-NEXT: vp2intersectd (%edx){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x02] ; X86-NEXT: kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01] ; X86-NEXT: kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08] @@ -159,7 +159,7 @@ define void @test_mm512_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm512_2intersect_epi32_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastss (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07] +; X64-NEXT: vpbroadcastd (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x58,0x07] ; X64-NEXT: vp2intersectd (%rsi){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x06] ; X64-NEXT: kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02] ; X64-NEXT: kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09] @@ -186,7 +186,7 @@ define void @test_mm512_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] -; X86-NEXT: vbroadcastsd (%edx), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x02] +; X86-NEXT: vpbroadcastq (%edx), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x02] ; X86-NEXT: vp2intersectq (%ecx){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x01] ; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] @@ -198,7 +198,7 @@ define void @test_mm512_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt ; ; X64-LABEL: test_mm512_2intersect_epi64_b: ; X64: # %bb.0: # %entry -; X64-NEXT: vbroadcastsd (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07] +; X64-NEXT: vpbroadcastq (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x07] ; X64-NEXT: vp2intersectq (%rsi){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x06] ; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] ; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] diff --git a/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll index 909a0a4feae0560..e2057a293255bbb 100644 --- a/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll +++ b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll @@ -11,7 +11,7 @@ define void @stack_fold_vp2intersectd(ptr %a, <16 x i32> %b, ptr nocapture %m0, ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %zmm0 +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ; CHECK-NEXT: vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %k0, (%rsi) ; CHECK-NEXT: kmovw %k1, (%rdx) @@ -35,7 +35,7 @@ define void @stack_fold_vp2intersectq(ptr %a, <8 x i64> %b, ptr nocapture %m0, p ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %zmm0 +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ; CHECK-NEXT: vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: kmovw %k0, %ecx @@ -61,7 +61,7 @@ define void @stack_fold_vp2intersectd_256(ptr %a, <8 x i32> %b, ptr nocapture %m ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-NEXT: vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 # 32-byte Folded Reload ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: kmovw %k0, %ecx @@ -87,7 +87,7 @@ define void @stack_fold_vp2intersectq_256(ptr %a, <4 x i64> %b, ptr nocapture %m ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-NEXT: vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 # 32-byte Folded Reload ; CHECK-NEXT: kshiftlw $12, %k0, %k2 ; CHECK-NEXT: kshiftrw $12, %k2, %k2 @@ -117,7 +117,7 @@ define void @stack_fold_vp2intersectd_128(ptr %a, <4 x i32> %b, ptr nocapture %m ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 # 16-byte Folded Reload ; CHECK-NEXT: kshiftlw $12, %k0, %k2 ; CHECK-NEXT: kshiftrw $12, %k2, %k2 @@ -146,7 +146,7 @@ define void @stack_fold_vp2intersectq_128(ptr %a, <2 x i64> %b, ptr nocapture %m ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 # 16-byte Folded Reload ; CHECK-NEXT: kshiftlw $14, %k0, %k2 ; CHECK-NEXT: kshiftrw $14, %k2, %k2 diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll index a2affbd8728c23c..9f2f1d57c2dbc0d 100644 --- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll +++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll @@ -14,9 +14,9 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X86-NEXT: andl $-64, %esp ; X86-NEXT: subl $64, %esp ; X86-NEXT: movl 456(%ebp), %esi -; X86-NEXT: vmovaps 328(%ebp), %zmm3 -; X86-NEXT: vmovaps 200(%ebp), %zmm4 -; X86-NEXT: vmovaps 72(%ebp), %zmm5 +; X86-NEXT: vmovdqa64 328(%ebp), %zmm3 +; X86-NEXT: vmovdqa64 200(%ebp), %zmm4 +; X86-NEXT: vmovdqa64 72(%ebp), %zmm5 ; X86-NEXT: vp2intersectd %zmm1, %zmm0, %k0 ; X86-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill ; X86-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill @@ -70,7 +70,7 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1 ; X64-NEXT: andq $-64, %rsp ; X64-NEXT: subq $64, %rsp ; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: vmovaps 16(%rbp), %zmm8 +; X64-NEXT: vmovdqa64 16(%rbp), %zmm8 ; X64-NEXT: vp2intersectd %zmm1, %zmm0, %k0 ; X64-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; X64-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits