[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint
coby created this revision. coby added reviewers: echristo, delena. coby added a subscriber: cfe-commits. coby set the repository for this revision to rL LLVM. Herald added a subscriber: mehdi_amini. 1. 'v' constraint for (x86) non-avx arch imitates the already implemented 'x' constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & mode (32/64). 2. for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent) This patch applies the needed changes to clang LLVM patch: Repository: rL LLVM https://reviews.llvm.org/D25004 Files: lib/Basic/Targets.cpp test/CodeGen/x86-inline-asm-v-constraint.c Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#i
Re: [PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint
coby updated this revision to Diff 72961. coby marked an inline comment as done. coby added a comment. Addressing comments: Added a check for KNL as well Repository: rL LLVM https://reviews.llvm.org/D25004 Files: x86-inline-asm-v-constraint.c Index: x86-inline-asm-v-constraint.c === --- x86-inline-asm-v-constraint.c +++ x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} Index: x86-inline-asm-v-constraint.c === --- x86-inline-asm-v-constraint.c +++ x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint
coby removed rL LLVM as the repository for this revision. coby updated this revision to Diff 74060. https://reviews.llvm.org/D25004 Files: lib/Basic/Targets.cpp test/CodeGen/x86-inline-asm-v-constraint.c Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1) +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1) +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0) +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16
[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint
coby closed this revision. coby added a comment. Commitd to revision 283716 https://reviews.llvm.org/D25004 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26258: [AVX512][clang] Amending vpmultishiftqb
coby abandoned this revision. coby added a comment. deprecated. Repository: rL LLVM https://reviews.llvm.org/D26258 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26587: [X86][AVX512][InlineASM][MS][clang] (I|G)CC Memory adjustments compatibility
coby updated this revision to Diff 78681. Repository: rL LLVM https://reviews.llvm.org/D26587 Files: test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c Index: test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c === --- test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c +++ test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s + +// Case1: Check integrity of inspected patch upon broadcasting +// Case2: Check integrity of inspected patch upon SIMD mem ref + +void F() { + char a; + // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4} + // CHECK: vaddps xmm1, xmm2, xmmword ptr $1 + __asm vaddps xmm1, xmm2, [a]{1to4} + __asm vaddps xmm1, xmm2, [a] +} Index: test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c === --- test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c +++ test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s + +// Case1: Check integrity of inspected patch upon broadcasting +// Case2: Check integrity of inspected patch upon SIMD mem ref + +void F() { + char a; + // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4} + // CHECK: vaddps xmm1, xmm2, xmmword ptr $1 + __asm vaddps xmm1, xmm2, [a]{1to4} + __asm vaddps xmm1, xmm2, [a] +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25717: [x86][inline-asm][clang][fixup] accept 'v' constraint
coby created this revision. coby added a reviewer: m_zuckerman. coby added a subscriber: cfe-commits. coby set the repository for this revision to rL LLVM. In respect to https://reviews.llvm.org/D25004 (reverted) Repository: rL LLVM https://reviews.llvm.org/D25717 Files: lib/Basic/Targets.cpp test/CodeGen/x86-inline-asm-v-constraint.c Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}" +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}" +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}" +__m512 testZMM(__m512 _zmm0, __m512 _zmm1) { +#ifdef AVX512 + __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0)); +#endif + return _zmm0; +} Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -3949,6 +3949,7 @@ case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. + case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. @@ -3989,6 +3990,7 @@ case 't': case 'u': return Size <= 128; + case 'v': case 'x': if (SSELevel >= AVX512F) // 512-bit zmm registers can be used if target supports AVX512F. Index: test/CodeGen/x86-inline-asm-v-constraint.c === --- test/CodeGen/x86-inline-asm-v-constraint.c +++ test/CodeGen/x86-inline-asm-v-constraint.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE + +typedef float __m128 __attribute__ ((vector_size (16))); +typedef float __m256 __attribute__ ((vector_size (32))); +typedef float __m512 __attribute__ ((vector_size (64))); + +// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}" +__m128 testXMM(__m128 _xmm0, long _l) { + __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0)); + return _xmm0; +} + +// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}" +__m256 testYMM(__m256 _ymm0) { +#ifdef AVX + __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0)); +#endif + return _ymm0; +
[PATCH] D25760: [AVX512][Clang] Adding missing instructions' variations
coby created this revision. coby added reviewers: m_zuckerman, igorb, AsafBadouh, craig.topper. coby added a subscriber: cfe-commits. coby set the repository for this revision to rL LLVM. The following patch introduces the following alternations: 1. correcting the 'vpmulshiftqb' instruction, along with the respective intrinsics 2. Adding AVX512 missing instructions variants. llvm part: https://reviews.llvm.org/D25759 Repository: rL LLVM https://reviews.llvm.org/D25760 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512vbmiintrin.h lib/Headers/avx512vbmivlintrin.h test/CodeGen/avx512vbmi-builtins.c Index: lib/Headers/avx512vbmiintrin.h === --- lib/Headers/avx512vbmiintrin.h +++ lib/Headers/avx512vbmiintrin.h @@ -105,30 +105,30 @@ } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) +_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) __W, -(__mmask64) __M); + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) __W, +(__mmask8) __M); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y) +_mm512_maskz_multishift_epi64_epi8 (__mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) _mm512_setzero_si512 (), -(__mmask64) __M); + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) _mm512_setzero_si512 (), +(__mmask8) __M); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) _mm512_undefined_epi32 (), -(__mmask64) -1); + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) _mm512_undefined_epi32 (), +(__mmask8) -1); } Index: lib/Headers/avx512vbmivlintrin.h === --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -184,61 +184,61 @@ } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) +_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) __W, -(__mmask16) __M); + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) __W, +(__mmask8) __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) +_mm_maskz_multishift_epi64_epi8 (__mmask8 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) _mm_setzero_si128 (), -(__mmask16) __M); +(__mmask8) __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) _mm_undefined_si128 (), -(__mmask16) -1); +(__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) +_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, -(__v32qi) __Y, -(__v32qi) __W, -(__mmask32) __M); + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X, +(__v4di) __Y, +(__v4di) __W, +(__mmask8) __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_multishift_epi64_epi8 (__mmask32
[PATCH] D26258: [AVX512][llvm] Amending vpmultishiftqb
coby created this revision. coby added reviewers: craig.topper, m_zuckerman, igorb, delena, AsafBadouh. coby added a subscriber: cfe-commits. coby set the repository for this revision to rL LLVM. The 'vpmultishiftqb' instruction was implemented falsely, this patch amend it. This is the clang part, llvm side is accessible here: https://reviews.llvm.org/D26257 Repository: rL LLVM https://reviews.llvm.org/D26258 Files: include/clang/Basic/BuiltinsX86.def lib/Headers/avx512vbmiintrin.h lib/Headers/avx512vbmivlintrin.h Index: lib/Headers/avx512vbmiintrin.h === --- lib/Headers/avx512vbmiintrin.h +++ lib/Headers/avx512vbmiintrin.h @@ -107,27 +107,27 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) __W, + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) __W, (__mmask64) __M); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) _mm512_setzero_si512 (), + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) _mm512_setzero_si512 (), (__mmask64) __M); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X, -(__v64qi) __Y, -(__v64qi) _mm512_undefined_epi32 (), + return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X, +(__v8di) __Y, +(__v8di) _mm512_undefined_epi32 (), (__mmask64) -1); } Index: lib/Headers/avx512vbmivlintrin.h === --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -186,57 +186,57 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) __W, + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) __W, (__mmask16) __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) _mm_setzero_si128 (), (__mmask16) __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) { - return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, -(__v16qi) __Y, -(__v16qi) + return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X, +(__v2di) __Y, +(__v2di) _mm_undefined_si128 (), (__mmask16) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, -(__v32qi) __Y, -(__v32qi) __W, + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X, +(__v4di) __Y, +(__v4di) __W, (__mmask32) __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, -(__v32qi) __Y, -(__v32qi) + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X, +(__v4di) __Y, +(__v4di) _mm256_setzero_si256 (), (__mmask32) __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, -(__v32qi) __Y, -(__v32qi) + return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X, +(__v4di) __Y, +(__v4di)
[PATCH] D25760: [AVX512][Clang] Adding missing instructions' variations
coby abandoned this revision. coby added a comment. Amending vpmultishiftqb is currently being maintained at the following patches: https://reviews.llvm.org/D26258 https://reviews.llvm.org/D26257 All other changes are discarded Repository: rL LLVM https://reviews.llvm.org/D25760 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26587: [X86][AVX512][InlineASM][MS][clang] (I|G)CC Memory adjustments compatibility
coby created this revision. coby added reviewers: m_zuckerman, rnk, myatsina. coby added a subscriber: cfe-commits. coby set the repository for this revision to rL LLVM. (I|G)CC will adjust a missing size qualifier on an indirect memory reference according to a certain logic, which is presented on the suggested patch. This patch implements a narrow view of those said adjustments - only upon AVX512 platforms, and only if the adjusted operand is missing a size qualifier & is of SIMD type (or a broadcast) Summery of adjustment logic: Unqualified indirect memory reference (i.e. - via 'brackets') AVX512 platform Operand is of SIMD type llvm part can be viewed here: https://reviews.llvm.org/D26586 Repository: rL LLVM https://reviews.llvm.org/D26587 Files: test/CodeGen/ms-inline-asm-avx512-relaxations.c Index: test/CodeGen/ms-inline-asm-avx512-relaxations.c === --- test/CodeGen/ms-inline-asm-avx512-relaxations.c +++ test/CodeGen/ms-inline-asm-avx512-relaxations.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s + +// Minimal reproducer +// Case1: Check integrity of inspected patch upon broadcasting +// Case2: Check integrity of inspected patch upon SIMD mem ref +// Case3: Check we don't mess up with non-SIMD mem ref +// Case4: Check non-AVX512 insts aren't affected + +void F() { + char a; + // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4} + // CHECK: vaddps xmm1, xmm2, xmmword ptr $1 + // CHECK: vcomiss xmm1, byte ptr $2 + // CHECK: mov rax, byte ptr $3 + __asm vaddps xmm1, xmm2, [a]{1to4} + __asm vaddps xmm1, xmm2, [a] + __asm vcomiss xmm1, [a] + __asm mov rax, [a] +} Index: test/CodeGen/ms-inline-asm-avx512-relaxations.c === --- test/CodeGen/ms-inline-asm-avx512-relaxations.c +++ test/CodeGen/ms-inline-asm-avx512-relaxations.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s + +// Minimal reproducer +// Case1: Check integrity of inspected patch upon broadcasting +// Case2: Check integrity of inspected patch upon SIMD mem ref +// Case3: Check we don't mess up with non-SIMD mem ref +// Case4: Check non-AVX512 insts aren't affected + +void F() { + char a; + // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4} + // CHECK: vaddps xmm1, xmm2, xmmword ptr $1 + // CHECK: vcomiss xmm1, byte ptr $2 + // CHECK: mov rax, byte ptr $3 + __asm vaddps xmm1, xmm2, [a]{1to4} + __asm vaddps xmm1, xmm2, [a] + __asm vcomiss xmm1, [a] + __asm mov rax, [a] +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
RE: [PATCH] D41583: [x86][icelake][vaes]
I see. agreed. Will upload shortly From: Craig Topper [mailto:craig.top...@gmail.com] Sent: Tuesday, December 26, 2017 19:52 To: reviews+d41583+public+5c6eea8282599...@reviews.llvm.org Cc: cfe-commits@lists.llvm.org; Tayree, Coby ; mgo...@gentoo.org Subject: Re: [PATCH] D41583: [x86][icelake][vaes] I meant if the command line says “-mvaes -mno-aes” we should make sure to disable vaes On Tue, Dec 26, 2017 at 9:47 AM coby via Phabricator mailto:revi...@reviews.llvm.org>> wrote: coby added inline comments. Comment at: lib/Basic/Targets/X86.cpp:573 setMMXLevel(Features, AMD3DNowAthlon, Enabled); } else if (Name == "aes") { if (Enabled) craig.topper wrote: > Shouldn't -aes imply -vaes? how come? perhaps i'm missing here something? why would the first imply the latter? following this road an atom z8XXX should be capable of supporting vaes, for example (where it lacks avx, for example, https://www.intel.com/content/www/us/en/processors/atom/atom-z8000-datasheet-vol-1.html) also, in that sense, pclmul is implying vpclmulqdq Repository: rC Clang https://reviews.llvm.org/D41583 -- ~Craig - Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits