[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint

2016-09-28 Thread coby via cfe-commits
coby created this revision.
coby added reviewers: echristo, delena.
coby added a subscriber: cfe-commits.
coby set the repository for this revision to rL LLVM.
Herald added a subscriber: mehdi_amini.

1. 'v' constraint for (x86) non-avx arch imitates the already implemented 'x' 
constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & 
mode (32/64).
2. for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent)

This patch applies the needed changes to clang
LLVM patch:

Repository:
  rL LLVM

https://reviews.llvm.org/D25004

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/x86-inline-asm-v-constraint.c

Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes 
AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", 
"=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1)
+__m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
+#ifdef AVX512
+  __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0));
+#endif
+  return _zmm0;
+}


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#i

Re: [PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint

2016-09-29 Thread coby via cfe-commits
coby updated this revision to Diff 72961.
coby marked an inline comment as done.
coby added a comment.

Addressing comments:
Added a check for KNL as well


Repository:
  rL LLVM

https://reviews.llvm.org/D25004

Files:
  x86-inline-asm-v-constraint.c

Index: x86-inline-asm-v-constraint.c
===
--- x86-inline-asm-v-constraint.c
+++ x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes 
AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", 
"=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1)
+__m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
+#ifdef AVX512
+  __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0));
+#endif
+  return _zmm0;
+}


Index: x86-inline-asm-v-constraint.c
===
--- x86-inline-asm-v-constraint.c
+++ x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1)
+__m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
+#ifdef AVX512
+  __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0));
+#endif
+  return _zmm0;
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint

2016-10-09 Thread coby via cfe-commits
coby removed rL LLVM as the repository for this revision.
coby updated this revision to Diff 74060.

https://reviews.llvm.org/D25004

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/x86-inline-asm-v-constraint.c


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes 
AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", 
"=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %0, <16 x float> %1)
+__m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
+#ifdef AVX512
+  __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0));
+#endif
+  return _zmm0;
+}


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %0, <4 x float> %1)
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %0)
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16

[PATCH] D25004: [x86][inline-asm][clang] accept 'v' constraint

2016-10-10 Thread coby via cfe-commits
coby closed this revision.
coby added a comment.

Commitd to revision 283716


https://reviews.llvm.org/D25004



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26258: [AVX512][clang] Amending vpmultishiftqb

2016-11-20 Thread coby via cfe-commits
coby abandoned this revision.
coby added a comment.

deprecated.


Repository:
  rL LLVM

https://reviews.llvm.org/D26258



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26587: [X86][AVX512][InlineASM][MS][clang] (I|G)CC Memory adjustments compatibility

2016-11-20 Thread coby via cfe-commits
coby updated this revision to Diff 78681.

Repository:
  rL LLVM

https://reviews.llvm.org/D26587

Files:
  test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c


Index: test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
===
--- test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
+++ test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -fasm-blocks -o - | FileCheck %s
+
+// Case1: Check integrity of inspected patch upon broadcasting
+// Case2: Check integrity of inspected patch upon SIMD mem ref
+
+void F() {
+  char a;
+  // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4}
+  // CHECK: vaddps xmm1, xmm2, xmmword ptr $1
+  __asm vaddps xmm1, xmm2, [a]{1to4}
+  __asm vaddps xmm1, xmm2, [a]
+}


Index: test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
===
--- test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
+++ test/CodeGen/ms-inline-asm-avx512-memory-adjustments.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s
+
+// Case1: Check integrity of inspected patch upon broadcasting
+// Case2: Check integrity of inspected patch upon SIMD mem ref
+
+void F() {
+  char a;
+  // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4}
+  // CHECK: vaddps xmm1, xmm2, xmmword ptr $1
+  __asm vaddps xmm1, xmm2, [a]{1to4}
+  __asm vaddps xmm1, xmm2, [a]
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25717: [x86][inline-asm][clang][fixup] accept 'v' constraint

2016-10-18 Thread coby via cfe-commits
coby created this revision.
coby added a reviewer: m_zuckerman.
coby added a subscriber: cfe-commits.
coby set the repository for this revision to rL LLVM.

In respect to https://reviews.llvm.org/D25004 (reverted)


Repository:
  rL LLVM

https://reviews.llvm.org/D25717

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/x86-inline-asm-v-constraint.c


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes 
AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", 
"=v,v,~{dirflag},~{fpsr},~{flags}"
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+}
+
+// AVX512: call <16 x float> asm "vpternlogd $$0, $1, $2, $0", 
"=v,v,v,~{dirflag},~{fpsr},~{flags}"
+__m512 testZMM(__m512 _zmm0, __m512 _zmm1) {
+#ifdef AVX512
+  __asm__("vpternlogd $0, %1, %2, %0" :"=v"(_zmm0) : "v"(_zmm1), "v"(_zmm0));
+#endif
+  return _zmm0;
+}


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3949,6 +3949,7 @@
   case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
+  case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
   case 'x': // Any SSE register.
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
@@ -3989,6 +3990,7 @@
   case 't':
   case 'u':
 return Size <= 128;
+  case 'v':
   case 'x':
 if (SSELevel >= AVX512F)
   // 512-bit zmm registers can be used if target supports AVX512F.
Index: test/CodeGen/x86-inline-asm-v-constraint.c
===
--- test/CodeGen/x86-inline-asm-v-constraint.c
+++ test/CodeGen/x86-inline-asm-v-constraint.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - | FileCheck %s --check-prefix SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
+
+typedef float __m128 __attribute__ ((vector_size (16)));
+typedef float __m256 __attribute__ ((vector_size (32)));
+typedef float __m512 __attribute__ ((vector_size (64)));
+
+// SSE: call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"
+__m128 testXMM(__m128 _xmm0, long _l) {
+  __asm__("vmovhlps %1, %2, %0" :"=v"(_xmm0) : "v"(_l), "v"(_xmm0));
+  return _xmm0;
+}
+
+// AVX: call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"
+__m256 testYMM(__m256 _ymm0) {
+#ifdef AVX
+  __asm__("vmovsldup %1, %0" :"=v"(_ymm0) : "v"(_ymm0));
+#endif
+  return _ymm0;
+

[PATCH] D25760: [AVX512][Clang] Adding missing instructions' variations

2016-10-19 Thread coby via cfe-commits
coby created this revision.
coby added reviewers: m_zuckerman, igorb, AsafBadouh, craig.topper.
coby added a subscriber: cfe-commits.
coby set the repository for this revision to rL LLVM.

The following patch introduces the following alternations:

1. correcting the 'vpmulshiftqb' instruction, along with the respective 
intrinsics
2. Adding AVX512 missing instructions variants.

llvm part: https://reviews.llvm.org/D25759


Repository:
  rL LLVM

https://reviews.llvm.org/D25760

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avx512vbmiintrin.h
  lib/Headers/avx512vbmivlintrin.h
  test/CodeGen/avx512vbmi-builtins.c

Index: lib/Headers/avx512vbmiintrin.h
===
--- lib/Headers/avx512vbmiintrin.h
+++ lib/Headers/avx512vbmiintrin.h
@@ -105,30 +105,30 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
+_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) __W,
-(__mmask64) __M);
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) __W,
+(__mmask8) __M);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
+_mm512_maskz_multishift_epi64_epi8 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) _mm512_setzero_si512 (),
-(__mmask64) __M);
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) _mm512_setzero_si512 (),
+(__mmask8) __M);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) _mm512_undefined_epi32 (),
-(__mmask64) -1);
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) _mm512_undefined_epi32 (),
+(__mmask8) -1);
 }
 
 
Index: lib/Headers/avx512vbmivlintrin.h
===
--- lib/Headers/avx512vbmivlintrin.h
+++ lib/Headers/avx512vbmivlintrin.h
@@ -184,61 +184,61 @@
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
+_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi) __W,
-(__mmask16) __M);
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di) __W,
+(__mmask8) __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
+_mm_maskz_multishift_epi64_epi8 (__mmask8 __M, __m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi)
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di)
 _mm_setzero_si128 (),
-(__mmask16) __M);
+(__mmask8) __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi)
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di)
 _mm_undefined_si128 (),
-(__mmask16) -1);
+(__mmask8) -1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
+_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-(__v32qi) __Y,
-(__v32qi) __W,
-(__mmask32) __M);
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X,
+(__v4di) __Y,
+(__v4di) __W,
+(__mmask8) __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_multishift_epi64_epi8 (__mmask32 

[PATCH] D26258: [AVX512][llvm] Amending vpmultishiftqb

2016-11-02 Thread coby via cfe-commits
coby created this revision.
coby added reviewers: craig.topper, m_zuckerman, igorb, delena, AsafBadouh.
coby added a subscriber: cfe-commits.
coby set the repository for this revision to rL LLVM.

The 'vpmultishiftqb' instruction was implemented falsely, this patch amend it.
This is the clang part, llvm side is accessible here:
https://reviews.llvm.org/D26257


Repository:
  rL LLVM

https://reviews.llvm.org/D26258

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/Headers/avx512vbmiintrin.h
  lib/Headers/avx512vbmivlintrin.h

Index: lib/Headers/avx512vbmiintrin.h
===
--- lib/Headers/avx512vbmiintrin.h
+++ lib/Headers/avx512vbmiintrin.h
@@ -107,27 +107,27 @@
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) __W,
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) __W,
 (__mmask64) __M);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) _mm512_setzero_si512 (),
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) _mm512_setzero_si512 (),
 (__mmask64) __M);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
-(__v64qi) __Y,
-(__v64qi) _mm512_undefined_epi32 (),
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v8di) __X,
+(__v8di) __Y,
+(__v8di) _mm512_undefined_epi32 (),
 (__mmask64) -1);
 }
 
Index: lib/Headers/avx512vbmivlintrin.h
===
--- lib/Headers/avx512vbmivlintrin.h
+++ lib/Headers/avx512vbmivlintrin.h
@@ -186,57 +186,57 @@
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi) __W,
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di) __W,
 (__mmask16) __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi)
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di)
 _mm_setzero_si128 (),
 (__mmask16) __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
 {
-  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
-(__v16qi) __Y,
-(__v16qi)
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v2di) __X,
+(__v2di) __Y,
+(__v2di)
 _mm_undefined_si128 (),
 (__mmask16) -1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-(__v32qi) __Y,
-(__v32qi) __W,
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X,
+(__v4di) __Y,
+(__v4di) __W,
 (__mmask32) __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-(__v32qi) __Y,
-(__v32qi)
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X,
+(__v4di) __Y,
+(__v4di)
 _mm256_setzero_si256 (),
 (__mmask32) __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
-(__v32qi) __Y,
-(__v32qi)
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v4di) __X,
+(__v4di) __Y,
+(__v4di)
   

[PATCH] D25760: [AVX512][Clang] Adding missing instructions' variations

2016-11-02 Thread coby via cfe-commits
coby abandoned this revision.
coby added a comment.

Amending vpmultishiftqb is currently being maintained at the following patches:
https://reviews.llvm.org/D26258
https://reviews.llvm.org/D26257
All other changes are discarded


Repository:
  rL LLVM

https://reviews.llvm.org/D25760



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26587: [X86][AVX512][InlineASM][MS][clang] (I|G)CC Memory adjustments compatibility

2016-11-13 Thread coby via cfe-commits
coby created this revision.
coby added reviewers: m_zuckerman, rnk, myatsina.
coby added a subscriber: cfe-commits.
coby set the repository for this revision to rL LLVM.

(I|G)CC will adjust a missing size qualifier on an indirect memory reference 
according to a certain logic, which is presented on the suggested patch.
This patch implements a narrow view of those said adjustments - only upon 
AVX512 platforms, and only if the adjusted operand is missing a size qualifier 
& is of SIMD type (or a broadcast)
Summery of adjustment logic:

Unqualified indirect memory reference (i.e. - via 'brackets')
AVX512 platform
Operand is of SIMD type
llvm part can be viewed here: https://reviews.llvm.org/D26586


Repository:
  rL LLVM

https://reviews.llvm.org/D26587

Files:
  test/CodeGen/ms-inline-asm-avx512-relaxations.c


Index: test/CodeGen/ms-inline-asm-avx512-relaxations.c
===
--- test/CodeGen/ms-inline-asm-avx512-relaxations.c
+++ test/CodeGen/ms-inline-asm-avx512-relaxations.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu 
skylake-avx512 -fasm-blocks -o - | FileCheck %s
+
+// Minimal reproducer
+// Case1: Check integrity of inspected patch upon broadcasting
+// Case2: Check integrity of inspected patch upon SIMD mem ref
+// Case3: Check we don't mess up with non-SIMD mem ref
+// Case4: Check non-AVX512 insts aren't affected
+
+void F() {
+  char a;
+  // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4}
+  // CHECK: vaddps xmm1, xmm2, xmmword ptr $1
+  // CHECK: vcomiss xmm1, byte ptr $2
+  // CHECK: mov rax, byte ptr $3
+  __asm vaddps xmm1, xmm2, [a]{1to4}
+  __asm vaddps xmm1, xmm2, [a]
+  __asm vcomiss xmm1, [a]
+  __asm mov rax, [a]
+}


Index: test/CodeGen/ms-inline-asm-avx512-relaxations.c
===
--- test/CodeGen/ms-inline-asm-avx512-relaxations.c
+++ test/CodeGen/ms-inline-asm-avx512-relaxations.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -fasm-blocks -o - | FileCheck %s
+
+// Minimal reproducer
+// Case1: Check integrity of inspected patch upon broadcasting
+// Case2: Check integrity of inspected patch upon SIMD mem ref
+// Case3: Check we don't mess up with non-SIMD mem ref
+// Case4: Check non-AVX512 insts aren't affected
+
+void F() {
+  char a;
+  // CHECK: vaddps xmm1, xmm2, dword ptr $0{1to4}
+  // CHECK: vaddps xmm1, xmm2, xmmword ptr $1
+  // CHECK: vcomiss xmm1, byte ptr $2
+  // CHECK: mov rax, byte ptr $3
+  __asm vaddps xmm1, xmm2, [a]{1to4}
+  __asm vaddps xmm1, xmm2, [a]
+  __asm vcomiss xmm1, [a]
+  __asm mov rax, [a]
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


RE: [PATCH] D41583: [x86][icelake][vaes]

2017-12-26 Thread Tayree, Coby via cfe-commits
I see. agreed. Will upload shortly

From: Craig Topper [mailto:craig.top...@gmail.com]
Sent: Tuesday, December 26, 2017 19:52
To: reviews+d41583+public+5c6eea8282599...@reviews.llvm.org
Cc: cfe-commits@lists.llvm.org; Tayree, Coby ; 
mgo...@gentoo.org
Subject: Re: [PATCH] D41583: [x86][icelake][vaes]

I meant if the command line says “-mvaes -mno-aes” we should make sure to 
disable vaes

On Tue, Dec 26, 2017 at 9:47 AM coby via Phabricator 
mailto:revi...@reviews.llvm.org>> wrote:
coby added inline comments.



Comment at: lib/Basic/Targets/X86.cpp:573
 setMMXLevel(Features, AMD3DNowAthlon, Enabled);
   } else if (Name == "aes") {
 if (Enabled)

craig.topper wrote:
> Shouldn't -aes imply -vaes?
how come? perhaps i'm missing here something?
why would the first imply the latter?
following this road an atom z8XXX should be capable of supporting vaes, for 
example (where it lacks avx, for example, 
https://www.intel.com/content/www/us/en/processors/atom/atom-z8000-datasheet-vol-1.html)
also, in that sense, pclmul is implying vpclmulqdq


Repository:
  rC Clang

https://reviews.llvm.org/D41583


--
~Craig
-
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits