https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/108537
>From d06ba3b08ddf7282da9e53705a4ca9b27f0c1c4d Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Fri, 13 Sep 2024 03:26:14 -0700 Subject: [PATCH 1/6] update clr --- clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/avx10_2copyintrin.h | 34 ++++++++++ clang/lib/Headers/immintrin.h | 1 + clang/test/CodeGen/X86/avx512copy-builtins.c | 17 +++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +- llvm/lib/Target/X86/X86InstrAVX10.td | 64 +++++++++++++++++++ .../test/CodeGen/X86/avx512copy-intrinsics.ll | 35 ++++++++++ .../MC/Disassembler/X86/avx10.2-copy-32.txt | 34 ++++++++++ .../MC/Disassembler/X86/avx10.2-copy-64.txt | 34 ++++++++++ llvm/test/MC/X86/avx10.2-copy-32-att.s | 17 +++++ llvm/test/MC/X86/avx10.2-copy-32-intel.s | 17 +++++ llvm/test/MC/X86/avx10.2-copy-64-att.s | 17 +++++ llvm/test/MC/X86/avx10.2-copy-64-intel.s | 17 +++++ llvm/test/TableGen/x86-fold-tables.inc | 2 + llvm/utils/TableGen/X86ManualInstrMapping.def | 1 + 15 files changed, 294 insertions(+), 2 deletions(-) create mode 100644 clang/lib/Headers/avx10_2copyintrin.h create mode 100644 clang/test/CodeGen/X86/avx512copy-builtins.c create mode 100644 llvm/test/CodeGen/X86/avx512copy-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-att.s create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-intel.s create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-att.s create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-intel.s diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 4c75c638b41bae..f5cc07c303f9eb 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -156,6 +156,7 @@ set(x86_files avx10_2_512satcvtintrin.h avx10_2bf16intrin.h avx10_2convertintrin.h + avx10_2copyintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h avx10_2satcvtdsintrin.h diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h new file mode 100644 index 00000000000000..13e76c6abe8993 --- /dev/null +++ b/clang/lib/Headers/avx10_2copyintrin.h @@ -0,0 +1,34 @@ +/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2COPYINTRIN_H +#define __AVX10_2COPYINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8); +} + +#undef __DEFAULT_FN_ATTRS128 + +#endif // __AVX10_2COPYINTRIN_H \ No newline at end of file diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 280154f3c1026e..3fbabffa98df20 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) { #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) #include <avx10_2bf16intrin.h> #include <avx10_2convertintrin.h> +#include <avx10_2copyintrin.h> #include <avx10_2minmaxintrin.h> #include <avx10_2niintrin.h> #include <avx10_2satcvtdsintrin.h> diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c new file mode 100644 index 00000000000000..06f7507bde53ed --- /dev/null +++ b/clang/test/CodeGen/X86/avx512copy-builtins.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include <immintrin.h> +#include <stddef.h> + +__m128i test_mm_move_epi32(__m128i A) { + // CHECK-LABEL: test_mm_move_epi32 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + return _mm_move_epi32(A); +} + +__m128i test_mm_move_epi16(__m128i A) { + // CHECK-LABEL: test_mm_move_epi16 + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + return _mm_move_epi16(A); +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f6d42ade600885..6e8c8ca3c44d0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12348,7 +12348,7 @@ static SDValue lowerShuffleAsElementInsertion( } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || - EltVT == MVT::i16) { + (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) { // Either not inserting from the low element of the input or the input // element size is too small to use VZEXT_MOVL to clear the high bits. return SDValue(); @@ -38342,7 +38342,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) || - (MaskEltSize == 16 && Subtarget.hasFP16())) && + (MaskEltSize == 16 && + (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) && isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index ada2bbaffd6645..f66705a5a3de35 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub, X86Fnmsub, SchedWriteFMA>; } + +//------------------------------------------------- +// AVX10 MOVZXC (COPY) instructions +//------------------------------------------------- +let Predicates = [HasAVX10_2] in { + def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v4i32 (X86vzmovl + (v4i32 VR128X:$src))))]>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i32mem:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", + (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; + +def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v8i16 (X86vzmovl + (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), + (ins i16mem:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + []>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", + (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; +} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll new file mode 100644 index 00000000000000..a7ca23792e6feb --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC + +define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi32: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi32: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] +; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + ret <4 x i32> %res +} + +define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi16: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi16: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <8 x i16> %res +} diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt new file mode 100644 index 00000000000000..e86c2340a486c5 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%ecx), %xmm5 +# INTEL: vmovd xmm5, dword ptr [ecx] +0x62 0xf1 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm5, (%ecx) +# INTEL: vmovd dword ptr [ecx], xmm5 +0x62 0xf1 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7e 0x08 0x7e 0xca + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7d 0x08 0xd6 0xca + +# ATT: vmovw %xmm5, (%ecx) +# INTEL: vmovw dword ptr [ecx], xmm5 +0x62 0xf5 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%ecx), %xmm5 +# INTEL: vmovw xmm5, word ptr [ecx] +0x62 0xf5 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x6e 0xca + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x7e 0xca diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt new file mode 100644 index 00000000000000..36ddd75a77ad39 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%rcx), %xmm29 +# INTEL: vmovd xmm29, dword ptr [rcx] +0x62 0x61 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm29, (%rcx) +# INTEL: vmovd dword ptr [rcx], xmm29 +0x62 0x61 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7e 0x08 0x7e 0xee + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7d 0x08 0xd6 0xee + +# ATT: vmovw %xmm29, (%rcx) +# INTEL: vmovw dword ptr [rcx], xmm29 +0x62 0x65 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%rcx), %xmm29 +# INTEL: vmovw xmm29, word ptr [rcx] +0x62 0x65 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x6e 0xee + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x7e 0xee diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s new file mode 100644 index 00000000000000..a77f19a5dce542 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s + +// CHECK: vmovd %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca] + vmovd %xmm2, %xmm1 + +// CHECK: vmovd %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca] + vmovd.s %xmm2, %xmm1 + +// CHECK: vmovw %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca] + vmovw %xmm2, %xmm1 + +// CHECK: vmovw %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca] + vmovw.s %xmm2, %xmm1 diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s new file mode 100644 index 00000000000000..222dc2f939c77a --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm1, xmm2 +// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca] + vmovd xmm1, xmm2 + +// CHECK: vmovd xmm1, xmm2 +// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca] + vmovd.s xmm1, xmm2 + +// CHECK: vmovw xmm1, xmm2 +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca] + vmovw xmm1, xmm2 + +// CHECK: vmovw xmm1, xmm2 +// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca] + vmovw.s xmm1, xmm2 diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s new file mode 100644 index 00000000000000..e27d333222a38a --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: vmovd %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee] + vmovd %xmm22, %xmm21 + +// CHECK: vmovd %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee] + vmovd.s %xmm22, %xmm21 + +// CHECK: vmovw %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee] + vmovw %xmm22, %xmm21 + +// CHECK: vmovw %xmm22, %xmm21 +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee] + vmovw.s %xmm22, %xmm21 diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s new file mode 100644 index 00000000000000..ed364d4402313d --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm21, xmm22 +// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee] + vmovd xmm21, xmm22 + +// CHECK: vmovd xmm21, xmm22 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee] + vmovd.s xmm21, xmm22 + +// CHECK: vmovw xmm21, xmm22 +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee] + vmovw xmm21, xmm22 + +// CHECK: vmovw xmm21, xmm22 +// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee] + vmovw.s xmm21, xmm22 diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index e85708ac1cc458..412c568677d986 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0}, {X86::VMOVUPSrr, X86::VMOVUPSrm, 0}, {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE}, + {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE}, + {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE}, {X86::VPABSBYrr, X86::VPABSBYrm, 0}, {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0}, {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0}, diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def index d76c404722b0ac..bc539d792f38df 100644 --- a/llvm/utils/TableGen/X86ManualInstrMapping.def +++ b/llvm/utils/TableGen/X86ManualInstrMapping.def @@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri) NOCOMP(VPSRAQZ128rm) NOCOMP(VPSRAQZ128rr) NOCOMP(VSCALEFPSZ128rm) +NOCOMP(VMOVZPDILo2PDIZrr) NOCOMP(VDBPSADBWZ256rmi) NOCOMP(VDBPSADBWZ256rri) NOCOMP(VPMAXSQZ256rm) >From d83e328dc157c2ba2c54a4c3e9871c33226d6b76 Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Fri, 13 Sep 2024 10:37:39 -0700 Subject: [PATCH 2/6] update test --- llvm/test/MC/X86/avx10.2-copy-32-att.s | 89 ++++++++++++++++--- llvm/test/MC/X86/avx10.2-copy-32-intel.s | 88 ++++++++++++++++--- llvm/test/MC/X86/avx10.2-copy-64-att.s | 104 ++++++++++++++++++++--- llvm/test/MC/X86/avx10.2-copy-64-intel.s | 104 ++++++++++++++++++++--- 4 files changed, 337 insertions(+), 48 deletions(-) diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s index a77f19a5dce542..2bc498720849c9 100644 --- a/llvm/test/MC/X86/avx10.2-copy-32-att.s +++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s @@ -1,17 +1,82 @@ // RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s -// CHECK: vmovd %xmm2, %xmm1 -// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca] - vmovd %xmm2, %xmm1 +// CHECK: vmovd 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd 268435456(%esp,%esi,8), %xmm2 -// CHECK: vmovd %xmm2, %xmm1 -// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca] - vmovd.s %xmm2, %xmm1 +// CHECK: vmovd 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd 291(%edi,%eax,4), %xmm2 -// CHECK: vmovw %xmm2, %xmm1 -// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca] - vmovw %xmm2, %xmm1 +// CHECK: vmovd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd (%eax), %xmm2 + +// CHECK: vmovd -128(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%ebp,2), %xmm2 + +// CHECK: vmovd %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovd %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovd %xmm3, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd %xmm3, (%eax) + +// CHECK: vmovd %xmm3, -128(,%ebp,2) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm3, -128(,%ebp,2) + +// CHECK: vmovw 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vmovw 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw 291(%edi,%eax,4), %xmm2 + +// CHECK: vmovw (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw (%eax), %xmm2 + +// CHECK: vmovw -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%ebp,2), %xmm2 + +// CHECK: vmovw 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw 254(%ecx), %xmm2 + +// CHECK: vmovw -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw -256(%edx), %xmm2 + +// CHECK: vmovw %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovw %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovw %xmm3, (%eax) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw %xmm3, (%eax) + +// CHECK: vmovw %xmm3, -64(,%ebp,2) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm3, -64(,%ebp,2) + +// CHECK: vmovw %xmm3, 254(%ecx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw %xmm3, 254(%ecx) + +// CHECK: vmovw %xmm3, -256(%edx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw %xmm3, -256(%edx) -// CHECK: vmovw %xmm2, %xmm1 -// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca] - vmovw.s %xmm2, %xmm1 diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s index 222dc2f939c77a..aa84548e5f75dd 100644 --- a/llvm/test/MC/X86/avx10.2-copy-32-intel.s +++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s @@ -1,17 +1,81 @@ // RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s -// CHECK: vmovd xmm1, xmm2 -// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca] - vmovd xmm1, xmm2 +// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd xmm2, dword ptr [esp + 8*esi + 268435456] -// CHECK: vmovd xmm1, xmm2 -// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca] - vmovd.s xmm1, xmm2 +// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd xmm2, dword ptr [edi + 4*eax + 291] -// CHECK: vmovw xmm1, xmm2 -// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca] - vmovw xmm1, xmm2 +// CHECK: vmovd xmm2, dword ptr [eax] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd xmm2, dword ptr [eax] -// CHECK: vmovw xmm1, xmm2 -// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca] - vmovw.s xmm1, xmm2 +// CHECK: vmovd xmm2, dword ptr [2*ebp - 128] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm2, dword ptr [2*ebp - 128] + +// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd dword ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd dword ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovd dword ptr [eax], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd dword ptr [eax], xmm3 + +// CHECK: vmovd dword ptr [2*ebp - 128], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*ebp - 128], xmm3 + +// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vmovw xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw xmm2, word ptr [eax] + +// CHECK: vmovw xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm2, word ptr [2*ebp - 64] + +// CHECK: vmovw xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw xmm2, word ptr [ecx + 254] + +// CHECK: vmovw xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw xmm2, word ptr [edx - 256] + +// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw word ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw word ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovw word ptr [eax], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw word ptr [eax], xmm3 + +// CHECK: vmovw word ptr [2*ebp - 64], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*ebp - 64], xmm3 + +// CHECK: vmovw word ptr [ecx + 254], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw word ptr [ecx + 254], xmm3 + +// CHECK: vmovw word ptr [edx - 256], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw word ptr [edx - 256], xmm3 diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s index e27d333222a38a..a672b2d842240c 100644 --- a/llvm/test/MC/X86/avx10.2-copy-64-att.s +++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s @@ -1,17 +1,97 @@ // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s -// CHECK: vmovd %xmm22, %xmm21 -// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee] - vmovd %xmm22, %xmm21 +// CHECK: vmovd 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd 268435456(%rbp,%r14,8), %xmm22 -// CHECK: vmovd %xmm22, %xmm21 -// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee] - vmovd.s %xmm22, %xmm21 +// CHECK: vmovd 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd 291(%r8,%rax,4), %xmm22 -// CHECK: vmovw %xmm22, %xmm21 -// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee] - vmovw %xmm22, %xmm21 +// CHECK: vmovd (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd (%rip), %xmm22 -// CHECK: vmovw %xmm22, %xmm21 -// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee] - vmovw.s %xmm22, %xmm21 +// CHECK: vmovd -128(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%rbp,2), %xmm22 + +// CHECK: vmovd 508(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd 508(%rcx), %xmm22 + +// CHECK: vmovd -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd -512(%rdx), %xmm22 + +// CHECK: vmovd %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovd %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovd %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd %xmm23, (%rip) + +// CHECK: vmovd %xmm23, -128(,%rbp,2) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm23, -128(,%rbp,2) + +// CHECK: vmovd %xmm23, 508(%rcx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd %xmm23, 508(%rcx) + +// CHECK: vmovd %xmm23, -512(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd %xmm23, -512(%rdx) + +// CHECK: vmovw 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vmovw 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw 291(%r8,%rax,4), %xmm22 + +// CHECK: vmovw (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw (%rip), %xmm22 + +// CHECK: vmovw -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%rbp,2), %xmm22 + +// CHECK: vmovw 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw 254(%rcx), %xmm22 + +// CHECK: vmovw -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw -256(%rdx), %xmm22 + +// CHECK: vmovw %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovw %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovw %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw %xmm23, (%rip) + +// CHECK: vmovw %xmm23, -64(,%rbp,2) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm23, -64(,%rbp,2) + +// CHECK: vmovw %xmm23, 254(%rcx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw %xmm23, 254(%rcx) + +// CHECK: vmovw %xmm23, -256(%rdx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw %xmm23, -256(%rdx) diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s index ed364d4402313d..4fd7b67dfa5db5 100644 --- a/llvm/test/MC/X86/avx10.2-copy-64-intel.s +++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s @@ -1,17 +1,97 @@ // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s -// CHECK: vmovd xmm21, xmm22 -// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee] - vmovd xmm21, xmm22 +// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] -// CHECK: vmovd xmm21, xmm22 -// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee] - vmovd.s xmm21, xmm22 +// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd xmm22, dword ptr [r8 + 4*rax + 291] -// CHECK: vmovw xmm21, xmm22 -// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee] - vmovw xmm21, xmm22 +// CHECK: vmovd xmm22, dword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd xmm22, dword ptr [rip] -// CHECK: vmovw xmm21, xmm22 -// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee] - vmovw.s xmm21, xmm22 +// CHECK: vmovd xmm22, dword ptr [2*rbp - 128] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm22, dword ptr [2*rbp - 128] + +// CHECK: vmovd xmm22, dword ptr [rcx + 508] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd xmm22, dword ptr [rcx + 508] + +// CHECK: vmovd xmm22, dword ptr [rdx - 512] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd xmm22, dword ptr [rdx - 512] + +// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd dword ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovd dword ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd dword ptr [rip], xmm23 + +// CHECK: vmovd dword ptr [2*rbp - 128], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*rbp - 128], xmm23 + +// CHECK: vmovd dword ptr [rcx + 508], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd dword ptr [rcx + 508], xmm23 + +// CHECK: vmovd dword ptr [rdx - 512], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd dword ptr [rdx - 512], xmm23 + +// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vmovw xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw xmm22, word ptr [rip] + +// CHECK: vmovw xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm22, word ptr [2*rbp - 64] + +// CHECK: vmovw xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw xmm22, word ptr [rcx + 254] + +// CHECK: vmovw xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw xmm22, word ptr [rdx - 256] + +// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw word ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovw word ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw word ptr [rip], xmm23 + +// CHECK: vmovw word ptr [2*rbp - 64], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*rbp - 64], xmm23 + +// CHECK: vmovw word ptr [rcx + 254], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw word ptr [rcx + 254], xmm23 + +// CHECK: vmovw word ptr [rdx - 256], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw word ptr [rdx - 256], xmm23 >From 36cc32180947287e69606de90312b08d5d39ba74 Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Fri, 13 Sep 2024 10:43:22 -0700 Subject: [PATCH 3/6] remove linebreak warning --- clang/lib/Headers/avx10_2copyintrin.h | 2 +- llvm/lib/Target/X86/X86InstrAVX10.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h index 13e76c6abe8993..7fc31190781d91 100644 --- a/clang/lib/Headers/avx10_2copyintrin.h +++ b/clang/lib/Headers/avx10_2copyintrin.h @@ -31,4 +31,4 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) { #undef __DEFAULT_FN_ATTRS128 -#endif // __AVX10_2COPYINTRIN_H \ No newline at end of file +#endif // __AVX10_2COPYINTRIN_H diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index f66705a5a3de35..2dc65e792f83e2 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1600,4 +1600,4 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in Sched<[WriteVecMoveFromGpr]>; def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; -} \ No newline at end of file +} >From 5d2af4630b3bf2fa91db7bbc9c5557466d94f5c2 Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Mon, 16 Sep 2024 22:19:27 -0700 Subject: [PATCH 4/6] remove f16 check --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +-- llvm/test/CodeGen/X86/avx512fp16-mov.ll | 18 +++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6e8c8ca3c44d0a..69defcb2f5ab0f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38342,8 +38342,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) || - (MaskEltSize == 16 && - (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) && + (MaskEltSize == 16 && Subtarget.hasAVX10_2())) && isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll index f4eb5b952ae436..f0b520ed095e98 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -2094,14 +2094,14 @@ for.end: ; preds = %for.body.preheader, define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { ; X64-LABEL: pr52561: ; X64: # %bb.0: -; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1 ; X64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112] -; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; X64-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0] +; X64-NEXT: vpand %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: pr52561: @@ -2113,11 +2113,11 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1 ; X86-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112] -; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 +; X86-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0] +; X86-NEXT: vpand %ymm2, %ymm0, %ymm0 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 -; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -2139,9 +2139,9 @@ define <8 x i16> @pr59628_xmm(i16 %arg) { ; X86-LABEL: pr59628_xmm: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X86-NEXT: vpbroadcastw %eax, %xmm1 -; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1 ; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl >From 1c4c16b8403fa12fa875946db87936b070e13824 Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Tue, 17 Sep 2024 02:42:11 -0700 Subject: [PATCH 5/6] Revert "remove f16 check" This reverts commit be0013472904aaa960ff1b5fe1add5b5be79973d. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++- llvm/test/CodeGen/X86/avx512fp16-mov.ll | 18 +++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 69defcb2f5ab0f..6e8c8ca3c44d0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38342,7 +38342,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) || - (MaskEltSize == 16 && Subtarget.hasAVX10_2())) && + (MaskEltSize == 16 && + (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) && isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll index f0b520ed095e98..f4eb5b952ae436 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -2094,14 +2094,14 @@ for.end: ; preds = %for.body.preheader, define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind { ; X64-LABEL: pr52561: ; X64: # %bb.0: -; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1 +; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112] -; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; X64-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0] -; X64-NEXT: vpand %ymm2, %ymm0, %ymm0 +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: pr52561: @@ -2113,11 +2113,11 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1 ; X86-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112] -; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; X86-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0] -; X86-NEXT: vpand %ymm2, %ymm0, %ymm0 +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 +; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -2139,9 +2139,9 @@ define <8 x i16> @pr59628_xmm(i16 %arg) { ; X86-LABEL: pr59628_xmm: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X86-NEXT: vpbroadcastw %eax, %xmm1 -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] +; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1 ; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl >From fc41a327fd2114e17d6de0c870becc020b544c25 Mon Sep 17 00:00:00 2001 From: mattarde <matta...@intel.com> Date: Tue, 17 Sep 2024 02:59:41 -0700 Subject: [PATCH 6/6] revert fp16 or avx102 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6e8c8ca3c44d0a..5c2c3dfb232e45 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38342,8 +38342,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) || - (MaskEltSize == 16 && - (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) && + (MaskEltSize == 16 && Subtarget.hasFP16())) && isUndefOrEqual(Mask[0], 0) && isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { Shuffle = X86ISD::VZEXT_MOVL; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits