from:"LiuChen via Phabricator via cfe\-commits"

[PATCH] D60748: Fix i386 struct and union parameter alignment

2019-09-18 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 220793.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/x86_32-align-linux.c
  clang/test/CodeGen/x86_32-align-linux.cpp
  clang/test/CodeGen/x86_32-arguments-linux.c

Index: clang/test/CodeGen/x86_32-arguments-linux.c
===
--- clang/test/CodeGen/x86_32-arguments-linux.c
+++ clang/test/CodeGen/x86_32-arguments-linux.c
@@ -5,19 +5,19 @@
 // CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
 // CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4 %0,
 // CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4 %1,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4 %2,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4 %3,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 4 %4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 4 %5)
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 16 %a7,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 16 %a9,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 32 %a11,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 32 %a13)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
 // CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
 // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}})
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 16 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 16 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 32 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 32 %{{[^ ]*}})
 // CHECK: }
 //
 //  [i386] clang misaligns long double in structures
Index: clang/test/CodeGen/x86_32-align-linux.cpp
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+class __attribute__((aligned(64))) X1 {
+  class  __attribute__((aligned(32))) {
+   __m128 a1;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X2 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint16 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(32))) X3 {
+  class __attribute__((aligned(64))) {
+int a1;
+alignedint16 a2;
+  } a;
+ int b;
+};
+
+class __attribute__((aligned(16))) X4 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint64 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X5 {
+  int x;
+};
+
+class __attribute__((aligned(64))) X6 {
+  int x;
+  alignedint64 y;
+};
+
+extern void foo(int, ...);
+
+class X1 x1;
+class X2 x2;
+class X3 x3;
+class X4 x4;
+class X5 x5;
+class X6 x6;
+
+// CHECK-LABEL: define void @_Z4testv()
+// CHECK: entry:
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X1* byval(%class.X1) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X2* byval(%class.X2) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X3* byval(%class.X3) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X4* byval(%class.X4) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X5* byval(%class.X5) align 4
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X6* byval(%class.X6) align 64
+
+void test(void)
+{
+  foo(1, x1);
+  foo(1, x2);
+  foo(1, x3);
+  foo(1, x4);
+  foo(1, x5);
+  foo(1, x6);
+}
Index: clang/test/CodeGen/x86_32-align-linux.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef union {
+  int d[4];
+   __m128 m;
+} M128;
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+struct __attribute__((aligned(64))) X1 {
+ struct  __attribute__((aligned(32))

[PATCH] D60748: Fix i386 struct and union parameter alignment

2019-09-29 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D60748#1681178 , @kib wrote:

> In fact, can we have an option controlling this ?  Does it have anything to 
> do with -malign-data gcc switch ?
>
> We do want to be able to optionally generate code ABI-compatible with modern 
> gcc, per user discretion.


I found -malign-data option only affects data alignment in data segment. 
-malign-data has three options:  “compat”,“ abi” and “cacheline”.  The default 
in GCC is ”compat,“ and clang’s behavior is consistent with "abi". 
And the data alignment on stack and parameters Passing on stack is not 
affected.  This patch only affects the alignment of passing parameter.
Should we add an option just like -malign-data?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D78473: [i386] Fix bug that get m128/m256/m512 with wrong alignment for variadic functions.Currently clang aligns to 16 bytes when passing m128/m256/m512 vector type.However, when

2020-04-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
LiuChen3 abandoned this revision.

...alignment, including
struct, union and vector types. For struct/union, there is no probem because it 
will align
to 4 bytes when passing them. For __m128/__m256/__m512 vector type, it will get 
wrong result.

This patch will get va_arg according the rules below:

1. When the target doesn't support avx and avx512: get __m128/__m256/__m512 
from 16 bytes aligned stack.
2. When the target supports avx: get __m256/__m512 from 32 bytes aligned stack.
3. When the target supports avx512: get __m512 from 64 bytes aligned stack.

Notice: The current behavior of clang is inconsistent with i386 abi. The 
i386-abi says as below:

1. If parameters of type __m256 are required to be passed on the stack, the 
stack pointer must be aligned on a 0 mod 32 byte boundary at the time of the 
call.
2. If parameters of type __m512 are required to be passed on the stack, the 
stack pointer must be aligned on a 0 mod 64 byte boundary at the time of the 
call.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78473

Files:
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/x86_32-align-linux-avx2.c
  clang/test/CodeGen/x86_32-align-linux-avx512f.c
  clang/test/CodeGen/x86_32-align-linux.c

Index: clang/test/CodeGen/x86_32-align-linux.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+  __m256 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m256);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux-avx512f.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx512f.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 63
+// CHECK-NEXT:  %2 = and i32 %1, -64
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux-avx2.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx2.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 31
+// CHECK-NEXT:

[PATCH] D78533: [i386] Fix bug that get m128/m256/__m512 with wrong alignment for variadic functions.

2020-04-21 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

I uploaded a new patch D78564  as another 
solution, but it modified the current clang calling convention.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78533/new/

https://reviews.llvm.org/D78533



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-08-02 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 449230.
LiuChen3 added a comment.

rebase and address rjmccall's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/CodeGenOptions.h
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/builtins-memset-inline.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -1166,7 +1166,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -1469,7 +1469,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -1795,7 +1795,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -1814,7 +1814,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
Index: clang/test/CodeGenObjC/property-atomic-bool.m
===
--- clang/test/CodeGenObjC/property-atomic-bool.m
+++ clang/test/CodeGenObjC/property-atomic-bool.m
@@ -5,7 +5,7 @@
 // CHECK:   %[[TOBOOL:.*]] = trunc i8 %[[ATOMIC_LOAD]] to i1
 // CHECK:   ret i1 %[[TOBOOL]]
 
-// CHECK: define internal void @"\01-[A0 setP:]"({{.*}} i1 noundef zeroext {{.*}})
+// CHECK: define internal void @"\01-[A0 setP:]"({{.*}} i1 noundef {{.*}})
 // CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} seq_cst, align 1
 // CHECK:   ret void
 
@@ -14,7 +14,7 @@
 // CHECK:   %[[TOBOOL:.*]] = trunc i8 %load to i1
 // CHECK:   ret i1 %[[TOBOOL]]
 
-// CHECK: define internal void @"\01-[A1 setP:]"({{.*}} i1 noundef zeroext %p)
+// CHECK: define internal void @"\01-[A1 setP:]"({{.*}} i1 noundef %p)
 // CHECK:   store atomic i8 %{{.*}}, i8* %{{.*}} unordered, align 1
 // CHECK:   ret void
 
Index: clang/test/CodeGenCXX/virtual-bases.cpp
===
--- clang/

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-08-02 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Sorry for the late update. I hope you can take the time to continue reviewing, 
@rjmccall . 
I'm not sure if I understand you correctly. Now if the 
`-mextend-small-integers=default` is used, compiler will report error.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-08-02 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D124435#3515541 , @jyknight wrote:

> I find the option names you have a bit confusing. I'd like to suggest calling 
> them, instead:
>
> caller: Extend a small integer parameter in the caller; callee will assume it 
> has already been extended.
> callee : Pass a small integer parameter directly in caller, extend in callee 
> when converting to full-width.
> both: Extend a small integer parameter in the caller; callee ALSO extends 
> when converting to full-width.
> default: Use the default rule for the target.
>
> I think that gets more to the point of what's going on here, even though it's 
> not exactly the case that "callee" always extends.

rjmccall's comment convinced me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-03 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added a project: All.
LiuChen3 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

For now we check if the amx intrinsics used on 64-bits target. This
check is not accurate.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D131134

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_amx_target_features.c

Index: clang/test/Preprocessor/x86_amx_target_features.c
===
--- clang/test/Preprocessor/x86_amx_target_features.c
+++ clang/test/Preprocessor/x86_amx_target_features.c
@@ -1,35 +1,35 @@
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
 
 // AMX-TILE: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
 
 // AMX-BF16: #define __AMXBF16__ 1
 // AMX-BF16: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
 
 // AMX-INT8: #define __AMXINT8__ 1
 // AMX-INT8: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
 
 // NOAMX-TILE-NOT: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXTILE__ 1
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXTILE__ 1
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
Index: clang/test/Driver/x86-target-features.c
===
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -270,20 +270,26 @@
 // WIDE_KL: "-target-feature" "+widekl"
 // NO-WIDE_KL: "-target-feature" "-widekl"
 
-// RUN: %clang --target=i386 -march=i386 -mamx-tile %s -### 2>&1 | FileCheck --check-prefix=AMX-TILE %s
-// RUN: %clang --target=i386 -march=i386 -mno-amx-tile %s -### 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %s
+// RUN: %clang --target=x86_64 -march=x86-64 -mamx-tile %s -### 2>&1 | FileCheck --check-prefix=AMX-TILE %s
+// RUN: %clang --target=x86_64 -march=x86-64 -mno-amx-tile %s -### 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %s
+// RUN: not %clang --target=i386 -march=i386 -mamx-tile %s 2>&1 | FileCheck %s -check-prefix=AMX-TILE-ERROR
 // AMX-TILE: "-target-feature" "+amx-tile"
 // NO-AMX-TILE: "-target-feature" "-amx-tile"
+// AMX-TILE-ERROR: error: option '-mamx-tile' cannot be specified on this target
 
-// RUN: %clang --target=i386 -march=i386 -mamx-bf16 %s -### 2>&1 | FileCheck --check-prefix=AMX-BF16 %s
-// RUN: %clang --target=i386 -march=i386 -mno-amx-bf16 %s -### 2>&1 | FileCheck -check-prefix=NO-AMX-BF16 %s
+// RUN: %clang --target=x86_64 -march=x86-64 -mamx-bf16 %s -### 2>&1 | FileCheck --check-prefix=AMX-BF16 %s
+// RUN: %clang --target=x86_64 -march=x86-64 -mno-amx-bf16 %s -### 2>&1 | FileCheck -check-prefix=NO-AMX-BF16 %s
+// RUN: not %clang --target=i386 -march=i386 -mamx-bf16 %s 2>&1 | FileCheck -check-prefix=AMX-BF16-ERROR %s

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-05 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D131134#3699742 , @craig.topper 
wrote:

> How does this interact with `-march=native -m32`. Won't that pick up the amx 
> flag from CPUID?



In D131134#3699344 , @aaron.ballman 
wrote:

> I think the precommit CI failures might be relevant here -- can you 
> double-check those?

Yes. This test is only supported on  amdgpu-registered-target so I didn't catch 
this fail on my local machine. Thanks for point out.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131134/new/

https://reviews.llvm.org/D131134

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-05 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D131134#3699742 , @craig.topper 
wrote:

> How does this interact with `-march=native -m32`. Won't that pick up the amx 
> flag from CPUID?

Good point. I will continue the work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131134/new/

https://reviews.llvm.org/D131134

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-07 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 450702.
LiuChen3 added a comment.

fix lit fail and handle '-march=native'


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131134/new/

https://reviews.llvm.org/D131134

Files:
  clang/include/clang/Basic/DiagnosticCommonKinds.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/predefined-arch-macros.c
  clang/test/Preprocessor/x86_amx_target_features.c

Index: clang/test/Preprocessor/x86_amx_target_features.c
===
--- clang/test/Preprocessor/x86_amx_target_features.c
+++ clang/test/Preprocessor/x86_amx_target_features.c
@@ -1,35 +1,35 @@
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
 
 // AMX-TILE: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
 
 // AMX-BF16: #define __AMXBF16__ 1
 // AMX-BF16: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
 
 // AMX-INT8: #define __AMXINT8__ 1
 // AMX-INT8: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
 
 // NOAMX-TILE-NOT: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXTILE__ 1
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXTILE__ 1
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1647,9 +1647,6 @@
 // RUN: -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M32
 // CHECK_SPR_M32: #define __AES__ 1
-// CHECK_SPR_M32: #define __AMXBF16__ 1
-// CHECK_SPR_M32: #define __AMXINT8__ 1
-// CHECK_SPR_M32: #define __AMXTILE__ 1
 // CHECK_SPR_M32: #define __AVX2__ 1
 // CHECK_SPR_M32: #define __AVX512BF16__ 1
 // CHECK_SPR_M32: #define __AVX512BITALG__ 1
@@ -1716,7 +1713,7 @@
 // CHECK_SPR_M32: #define i386 1
 
 // RUN: %clang -march=sapphirerapids -m64 -E -dM %s -o - 2>&1 \
-// RUN: -target i386-unknown-linux \
+// RUN: -target x86_64-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M64
 // CHECK_SPR_M64: #define __AES__ 1
 // CHECK_SPR_M64: #define __AMXBF16__ 1
Index: clang/test/Driver/x86-target-features.c
===
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -270,20 +270,26 @@
 // WIDE_KL: "-target-feature" "+widekl"
 // NO-WIDE_KL: "-target-feature" "-widekl"
 
-// RUN: %clang --target=i386 -march=i386 -mamx-tile %s -### 2>&1 | FileCheck --check-prefix=AMX-TILE %s
-// RUN: %clang --target=i386 -march=i386 -mno-amx-tile %s -### 2>&1 | FileCheck --check-pref

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-08 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 450724.
LiuChen3 added a comment.

change the error report


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131134/new/

https://reviews.llvm.org/D131134

Files:
  clang/include/clang/Basic/DiagnosticCommonKinds.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Driver/ToolChains/Arch/X86.cpp
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/predefined-arch-macros.c
  clang/test/Preprocessor/x86_amx_target_features.c

Index: clang/test/Preprocessor/x86_amx_target_features.c
===
--- clang/test/Preprocessor/x86_amx_target_features.c
+++ clang/test/Preprocessor/x86_amx_target_features.c
@@ -1,35 +1,35 @@
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=AMX-TILE %s
 
 // AMX-TILE: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-bf16 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-BF16 %s
 
 // AMX-BF16: #define __AMXBF16__ 1
 // AMX-BF16: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-int8 -x c -E -dM -o - %s | FileCheck -check-prefix=AMX-INT8 %s
 
 // AMX-INT8: #define __AMXINT8__ 1
 // AMX-INT8: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-TILE %s
 
 // NOAMX-TILE-NOT: #define __AMXTILE__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-bf16 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-bf16 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-BF16 %s
 
 // NOAMX-BF16-NOT: #define __AMXTILE__ 1
 // NOAMX-BF16-NOT: #define __AMXBF16__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-int8 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
 
-// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -amx-int8 -mno-amx-tile -x c -E -dM -o - %s | FileCheck  -check-prefix=NOAMX-INT8 %s
 
 // NOAMX-INT8-NOT: #define __AMXTILE__ 1
 // NOAMX-INT8-NOT: #define __AMXINT8__ 1
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1647,9 +1647,6 @@
 // RUN: -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M32
 // CHECK_SPR_M32: #define __AES__ 1
-// CHECK_SPR_M32: #define __AMXBF16__ 1
-// CHECK_SPR_M32: #define __AMXINT8__ 1
-// CHECK_SPR_M32: #define __AMXTILE__ 1
 // CHECK_SPR_M32: #define __AVX2__ 1
 // CHECK_SPR_M32: #define __AVX512BF16__ 1
 // CHECK_SPR_M32: #define __AVX512BITALG__ 1
@@ -1716,7 +1713,7 @@
 // CHECK_SPR_M32: #define i386 1
 
 // RUN: %clang -march=sapphirerapids -m64 -E -dM %s -o - 2>&1 \
-// RUN: -target i386-unknown-linux \
+// RUN: -target x86_64-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SPR_M64
 // CHECK_SPR_M64: #define __AES__ 1
 // CHECK_SPR_M64: #define __AMXBF16__ 1
Index: clang/test/Driver/x86-target-features.c
===
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -270,20 +270,26 @@
 // WIDE_KL: "-target-feature" "+widekl"
 // NO-WIDE_KL: "-target-feature" "-widekl"
 
-// RUN: %clang --target=i386 -march=i386 -mamx-tile %s -### 2>&1 | FileCheck --check-prefix=AMX-TILE %s
-// RUN: %clang --target=i386 -march=i386 -mno-amx-tile %s -### 2>&1 | FileCheck --check-prefix=NO-AMX-TILE %

[PATCH] D131134: [X86] Report error if the amx enabled on the non-64-bits target

2022-08-10 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D131134#3707397 , @craig.topper 
wrote:

> What problem are we trying to solve here? CPUID reports AMX as supported even 
> in 32-bit mode. Why can't the user pass it to the compiler?

I thought AMX is only supported on 64-bit mode. It seems I was wrong.  So for 
now just reporting an error to intrinsics is enough. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D131134/new/

https://reviews.llvm.org/D131134

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124067: [x86] Support 3 builtin functions for 32-bits targets

2022-04-20 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/sse2-builtins.c:560
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
+  // X86-LABEL: test_mm_cvtsi64_si128
+  // X86: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0

xiangzhangllvm wrote:
> xiangzhangllvm wrote:
> > RKSimon wrote:
> > > xiangzhangllvm wrote:
> > > > craig.topper wrote:
> > > > > Do we need the X86 prefix because of the x86-64 #ifdefs? Or are there 
> > > > > other differences?
> > > > > 
> > > > > If it's just the x86-64, can we add -check-prefixes=CHECK,X64 to the 
> > > > > x86-64 run lines and use X64 for the x86-64 only functions. That way 
> > > > > CHECK can be used for all the common tests.
> > > > Before I change the test, it only build with "-triple=x86_64", So all 
> > > > the CHECK should be X64 prefix. 
> > > > So I add X86 prefix to just let "RUN ... -triple=i386" only check the 
> > > > updated 3 builtins. (let the change be small).
> > > > 
> > > I'd much prefer we have complete test check prefix coverage for every RUN 
> > > - and tbh we should be properly testing 32-bit on every x86 intrinsic 
> > > test file.
> > Yes, testing 32-bit on every x86 intrinsic test file is make sense. I also 
> > confuse why this test not testing the 32-bit mode before. I think it is 
> > "defect" for the test.
> > But how can I well update the test by on checking the 3 updated intrinsics. 
> > Because it is strange to update the other intrinsics checking when I only 
> > update 3 intrinsics in clang.
> Hi @craig.topper , @RKSimon, if the 32 and 64 has common prefix "CHECK", it 
> means the line 4 (32 bits) need to check all other intrinsics. That means I 
> need to updated a lot check string for the 32 bit mode. 
> What's more, currently we have no tools to auto generate the checking code 
> for clang test. 
> 
Actually we have 'update_cc_test_checks.py', but little use.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124067/new/

https://reviews.llvm.org/D124067

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: dexonsmith, jdoerfert, pengfei.
Herald added a project: All.
LiuChen3 requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

For now clang will assume the integer parameters have been sign/zero
extended in the caller, which will cause some ABI compatibility issues.
This patch will remove the `signext/zeroext` from the callee so that the
callee will always extend the integer parameters:

1. Adds one new `ConservativeExtend` Kind, which means we shouldn't

make any assumptions about the caller and callee. In this case, we must
do zero/sign extension for integer parameters in caller and callee.

2. Adds `-mconservative-extend/-mno-conservative-extend` options. As

default `-mconservative-extend` is enabled. Use `-mno-conservative-extend`
to get back to the original behavior of clang.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -2051,7 +2051,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2335,7 +2335,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2642,7 +2642,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2661,7 +2661,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2975,7 +2975,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_A

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 425135.
LiuChen3 added a comment.

fix bug


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -2051,7 +2051,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2335,7 +2335,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2642,7 +2642,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2661,7 +2661,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2975,7 +2975,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3259,7 +3259,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3566,7 +3566,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D124435#3474130 , @skan wrote:

> Should we update the `clang/docs/ReleaseNotes.rst` for this?

Maybe? I will update it in the next patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2451
+  // attribute to the callee.
+  if (AttrOnCallSite || AI.getKind() == ABIArgInfo::Extend) {
+if (AI.isSignExt())

pengfei wrote:
> Does the change affect Windows? Seems Win64 doesn't extend on caller. 
> https://godbolt.org/z/c95hvvsWf
No.  This patch didn't nothing for Win64 ABI.



Comment at: clang/test/CodeGen/X86/integer_argument_passing.c:2
+// RUN: %clang_cc1 -O2 -triple -x86_64-linux-gnu %s -emit-llvm -o - | 
FileCheck %s --check-prefixes=EXTEND,CHECK
+// RUN: %clang_cc1 -O2 -triple -i386-linux-gnu %s -emit-llvm -o - | FileCheck 
%s --check-prefixes=EXTEND,CHECK
+// RUN: %clang_cc1 -O2 -triple -i386-pc-win32 %s -emit-llvm -o - | FileCheck 
%s --check-prefixes=EXTEND,CHECK

pengfei wrote:
> Maybe we can remove the tests for i386 given it's only for 64 bits ABI?
According to the meaning of `ConservativeExtend`, I think the 32bit ABI needs 
to be modified as well:
https://godbolt.org/z/W1Ma1T3f3
The dump of currently clang-cl:
```
_square:
movb4(%esp), %al
mulb%al
mulb8(%esp)
retl

.def_baz;
.scl2;
.type   32;
.endef
.section.text,"xr",one_only,_baz
.globl  _baz
.p2align4, 0x90
_baz:
movswl  4(%esp), %eax
pushl   %eax
calll   _bar
addl$4, %esp
retl
```
Of course with this patch the behavior of clang-cl is still different from 
cl.exe, but I think it fits the meaning of `ConservativeExtend`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/integer_argument_passing.c:2
+// RUN: %clang_cc1 -O2 -triple -x86_64-linux-gnu %s -emit-llvm -o - | 
FileCheck %s --check-prefixes=EXTEND,CHECK
+// RUN: %clang_cc1 -O2 -triple -i386-linux-gnu %s -emit-llvm -o - | FileCheck 
%s --check-prefixes=EXTEND,CHECK
+// RUN: %clang_cc1 -O2 -triple -i386-pc-win32 %s -emit-llvm -o - | FileCheck 
%s --check-prefixes=EXTEND,CHECK

pengfei wrote:
> pengfei wrote:
> > LiuChen3 wrote:
> > > pengfei wrote:
> > > > Maybe we can remove the tests for i386 given it's only for 64 bits ABI?
> > > According to the meaning of `ConservativeExtend`, I think the 32bit ABI 
> > > needs to be modified as well:
> > > https://godbolt.org/z/W1Ma1T3f3
> > > The dump of currently clang-cl:
> > > ```
> > > _square:
> > > movb4(%esp), %al
> > > mulb%al
> > > mulb8(%esp)
> > > retl
> > > 
> > > .def_baz;
> > > .scl2;
> > > .type   32;
> > > .endef
> > > .section.text,"xr",one_only,_baz
> > > .globl  _baz
> > > .p2align4, 0x90
> > > _baz:
> > > movswl  4(%esp), %eax
> > > pushl   %eax
> > > calll   _bar
> > > addl$4, %esp
> > > retl
> > > ```
> > > Of course with this patch the behavior of clang-cl is still different 
> > > from cl.exe, but I think it fits the meaning of `ConservativeExtend`.
> > My point was, i386 is passing arguments by stack. The extensions don't make 
> > sense under the circumstances. That's what I understood the comments in 
> > above test 2007-06-18-SextAttrAggregate.c
> Oh, seems I misunderstood it. The stack still needs extensions since it's 
> aligned to 4 bytes. But from the above output, the clang-cl is wrong, because 
> it extends on caller which MSVC extends on callee. So back the another 
> question, we should change for Windows too, right?
I just change the behavior of win32. Windows 64 will always do the extensions 
in callee. I didn't support `ConservativeExtend` for win64. The dump IR of 
currently clang-cl is:
```
define dso_local i8 @square(i8 noundef %a, i8 noundef %b) local_unnamed_addr #0 
{
...
}

  %call = tail call i32 @bar(i16 noundef %conv) #3
...
}

define dso_local i32 @baz(i32 noundef %num) local_unnamed_addr #1 {
...
  %call = tail call i32 @bar(i16 noundef %conv) #3
...
}
```
I think maybe we don't need to do the extension both in caller and callee for 
WIN64?  


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/docs/ClangCommandLineReference.rst:2988-2992
+.. option:: -mconservative-extend
+Always extend the integer parameter both in the callee and caller.
+
+.. option:: -mno-conservative-extend
+Keep the original integer parameter passing behavior.

rjmccall wrote:
> pengfei wrote:
> > Combine like others?
> How about:
> 
> ```
> In the past, Clang passed small integer arguments on certain targets using a
> parameter convention in which the caller was assumed to have sign-extended
> or zero-extended the argument to a certain width.  This convention was not
> conformant with the documented ABI on these platforms, which does not
> require the caller to perform this extension.  Clang no longer assumes that
> callers perform this extension, but for compatibility with code compiled by
> previous releases of Clang, Clang defaults to still extending the argument in 
> the
> caller.  `-mno-conservative-extend` disables this, which may improve
> performance and code size if compatibility with old versions of Clang is not
> required.
> 
> This affects most 32-bit and 64-bit x86 targets, except:
> - Windows, which Clang has never assumed extension on
> - Apple platforms, which use a non-standard ABI that unconditionally assumes 
> extension
> ```
> 
> Note that I need to check that that's what Apple actually wants to do.  You 
> should also reach out to the Sony folks to see what they want to do, but I 
> expect that it's to assume extension unconditionally.
Thanks a lot! It's much clearer.
Just a small correction for windows: only windows64 is not affected.



Comment at: clang/include/clang/CodeGen/CGFunctionInfo.h:333
   bool canHaveCoerceToType() const {
-return isDirect() || isExtend() || isCoerceAndExpand();
+return isDirect() || isExtend() || isCoerceAndExpand() ||
+   isConservativeExtend();

pengfei wrote:
> Can we move it to `isExtend`? e.g. `TheKind == Expand | TheKind == 
> ConservativeExtend`
I prefer to set it alone as it is a different `Kind` from `Extend`. 



Comment at: clang/lib/CodeGen/CGCall.cpp:2451
+  // attribute to the callee.
+  if (AttrOnCallSite || AI.getKind() == ABIArgInfo::Extend) {
+if (AI.isSignExt())

pengfei wrote:
> LiuChen3 wrote:
> > pengfei wrote:
> > > Does the change affect Windows? Seems Win64 doesn't extend on caller. 
> > > https://godbolt.org/z/c95hvvsWf
> > No.  This patch didn't nothing for Win64 ABI.
> But I found some Windows tests are affected?
I checked it again and it should be that only win32 is affected.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1938
+return IsConservativeExtend ? ABIArgInfo::getConservativeExtend(Ty)
+: ABIArgInfo::getExtend(Ty);
   }

rjmccall wrote:
> This looks wrong.  In non-`ConservativeExtend` mode, we don't get to assume 
> extension at all and should use `Direct`, right?
As I understand it, `Direct` means do nothing with the parameters. Caller won't 
do the extension and callee can't assume the parameter is correct. This makes 
new clang behave in the opposite way to currently clang behavior, which will 
cause incompatibility issue. e.g:
https://godbolt.org/z/d3Peq4nsG



Comment at: clang/lib/CodeGen/TargetInfo.cpp:3818
+  return ABIArgInfo::getConservativeExtend(Ty);
 return ABIArgInfo::getExtend(Ty);
+  }

rjmccall wrote:
> Same comment: this should use `Direct` when we're not in `ConservativeExtend` 
> mode, right?
Same as above.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 425427.
LiuChen3 added a comment.

Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -2051,7 +2051,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2335,7 +2335,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2642,7 +2642,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2661,7 +2661,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2975,7 +2975,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3259,7 +3259,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3566,7 +3566,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked 9 inline comments as done.
LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:2310
   case ABIArgInfo::Extend:
+  case ABIArgInfo::ConservativeExtend:
 if (RetAI.isSignExt())

At present, `ConservativeExtend` has no specific definition for return value, 
so I just make it follow the behavior of `Extend` .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 425428.
LiuChen3 added a comment.

add one missing comment


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -2051,7 +2051,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2335,7 +2335,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2642,7 +2642,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2661,7 +2661,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2975,7 +2975,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3259,7 +3259,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3566,7 +3566,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 n

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D124435#3474130 , @skan wrote:

> Should we update the `clang/docs/ReleaseNotes.rst` for this?

The ReleaseNotes says "written by LLVM Team". So I am not sure if I can update 
this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/docs/ClangCommandLineReference.rst:2988-2992
+.. option:: -mconservative-extend
+Always extend the integer parameter both in the callee and caller.
+
+.. option:: -mno-conservative-extend
+Keep the original integer parameter passing behavior.

LiuChen3 wrote:
> rjmccall wrote:
> > pengfei wrote:
> > > Combine like others?
> > How about:
> > 
> > ```
> > In the past, Clang passed small integer arguments on certain targets using a
> > parameter convention in which the caller was assumed to have sign-extended
> > or zero-extended the argument to a certain width.  This convention was not
> > conformant with the documented ABI on these platforms, which does not
> > require the caller to perform this extension.  Clang no longer assumes that
> > callers perform this extension, but for compatibility with code compiled by
> > previous releases of Clang, Clang defaults to still extending the argument 
> > in the
> > caller.  `-mno-conservative-extend` disables this, which may improve
> > performance and code size if compatibility with old versions of Clang is not
> > required.
> > 
> > This affects most 32-bit and 64-bit x86 targets, except:
> > - Windows, which Clang has never assumed extension on
> > - Apple platforms, which use a non-standard ABI that unconditionally 
> > assumes extension
> > ```
> > 
> > Note that I need to check that that's what Apple actually wants to do.  You 
> > should also reach out to the Sony folks to see what they want to do, but I 
> > expect that it's to assume extension unconditionally.
> Thanks a lot! It's much clearer.
> Just a small correction for windows: only windows64 is not affected.
Hi, @RKSimon , @probinson.  This patch will extend the integer parameters in 
the caller.  Is there any concern about this for Sony?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-04-27 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1938
+return IsConservativeExtend ? ABIArgInfo::getConservativeExtend(Ty)
+: ABIArgInfo::getExtend(Ty);
   }

rjmccall wrote:
> LiuChen3 wrote:
> > rjmccall wrote:
> > > This looks wrong.  In non-`ConservativeExtend` mode, we don't get to 
> > > assume extension at all and should use `Direct`, right?
> > As I understand it, `Direct` means do nothing with the parameters. Caller 
> > won't do the extension and callee can't assume the parameter is correct. 
> > This makes new clang behave in the opposite way to currently clang 
> > behavior, which will cause incompatibility issue. e.g:
> > https://godbolt.org/z/d3Peq4nsG
> Oh, I see, you're thinking that `-mno-conservative-extend` means "do what old 
> versions of clang did" rather than "break compatibility with old versions of 
> clang and just follow the x86_64 ABI".  That's definitely different from the 
> documentation I suggested, so something's got to change.
> 
> I think these are the possibilities here:
> 
> 1. Some platforms, like Apple's, are probably going to define Clang's current 
> behavior as the platform ABI.  So those platforms need to continue to use 
> `Extend`.  My previous comment about using `Direct` wasn't paying due 
> attention to this case.
> 
> 2. On other platforms, we need to maintain compatibility by default with both 
> the platform ABI and old Clang behavior.  Those platforms will need to use 
> `ConservativeExtend`.
> 
> 3. Some people may want to opt out of (2) and just be compatible with the 
> platform ABI, which has minor code-size and performance wins.  If we support 
> that with an option, I believe it should cause us to emit `Direct`.  This is 
> what I was thinking `-mno-conservative-extend` would mean.
> 
> 4. Some people may want to force the use of (1) even on platforms where that 
> isn't the platform ABI.  I don't know if this is really something we should 
> support in the long term, but it might be valuable for people staging this 
> change in.  This is what you seem to be thinking `-mno-conservative-extend` 
> would mean.
> 
> I would suggest these spellings for the argument, instead of making it 
> boolean:
> 
> ```
> -mextend-small-integers=none// Force the use of Direct
> -mextend-small-integers=conservative // Force the use of ConservativeExtend
> -mextend-small-integers=assumed // Force the use of Extend
> -mextend-small-integers=default // Use the default rule for the target
> ```
Yes. That's what I mean. I totally misunderstood your meaning before. 
I agree with your method. I will work on that. Just one concern about the 
'default': the default behavior is 'conservative' instead of 'default'. 
Wouldn't that be a little weird? But with my limited English I can't think of a 
better word. 'default' is ok for me. :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-05-06 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 427551.
LiuChen3 added a comment.

Use `-mextend-small-integers=` instead of boolean option


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/CodeGenOptions.h
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGen/2007-06-18-SextAttrAggregate.c
  clang/test/CodeGen/X86/integer_argument_passing.c
  clang/test/CodeGen/X86/x86_32-arguments-darwin.c
  clang/test/CodeGen/X86/x86_32-arguments-linux.c
  clang/test/CodeGen/X86/x86_64-arguments-nacl.c
  clang/test/CodeGen/X86/x86_64-arguments.c
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGen/builtin-align.c
  clang/test/CodeGen/catch-implicit-integer-sign-changes.c
  clang/test/CodeGen/ext-int-cc.c
  clang/test/CodeGen/function-attributes.c
  clang/test/CodeGen/mangle-windows.c
  clang/test/CodeGen/matrix-type-builtins.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGen/regcall.c
  clang/test/CodeGen/vectorcall.c
  clang/test/CodeGenCXX/exceptions.cpp
  clang/test/CodeGenCXX/ext-int.cpp
  clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
  clang/test/CodeGenCXX/new-overflow.cpp
  clang/test/CodeGenCXX/virtual-bases.cpp
  clang/test/CodeGenObjC/property-atomic-bool.m
  clang/test/OpenMP/target_codegen_global_capture.cpp

Index: clang/test/OpenMP/target_codegen_global_capture.cpp
===
--- clang/test/OpenMP/target_codegen_global_capture.cpp
+++ clang/test/OpenMP/target_codegen_global_capture.cpp
@@ -2051,7 +2051,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2335,7 +2335,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2642,7 +2642,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2661,7 +2661,7 @@
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z4tbarIsEiT_S0_S0_S0_
-// CHECK3-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] comdat {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK3-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -2975,7 +2975,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3foo
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3259,7 +3259,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3bar
-// CHECK4-SAME: (i16 noundef signext [[A:%.*]], i16 noundef signext [[B:%.*]], i16 noundef signext [[C:%.*]], i16 noundef signext [[D:%.*]]) #[[ATTR0]] {
+// CHECK4-SAME: (i16 noundef [[A:%.*]], i16 noundef [[B:%.*]], i16 noundef [[C:%.*]], i16 noundef [[D:%.*]]) #[[ATTR0]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:[[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK4-NEXT:[[B_ADDR:%.*]] = alloca i16, align 2
@@ -3566,7 +3566,7 @@
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tbar2
-// CHE

[PATCH] D124435: [X86] Always extend the integer parameters in callee

2022-05-16 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Thanks, @rjmccall . I'm sorry I don't have much time on this patch recently. I 
will update it later.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124435/new/

https://reviews.llvm.org/D124435

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-04 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 302769.
LiuChen3 added a comment.



1. Address comments;
2. Only support parsing vex/vex2/vex3/evex prefix for MASM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

Files:
  clang/test/CodeGen/X86/ms-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp


Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2847,7 +2847,33 @@
   }
   continue;
 }
-
+// Parse MASM style pseudo prefixes.
+// FIXME: This prefix should only be used for MASM, not for intel-syntax.
+if (isParsingIntelSyntax()) {
+  bool IsPrefix = false;
+  if (Name == "vex") {
+ForcedVEXEncoding = VEXEncoding_VEX;
+IsPrefix = true;
+  } else if (Name == "vex2") {
+ForcedVEXEncoding = VEXEncoding_VEX2;
+IsPrefix = true;
+  } else if (Name == "vex3") {
+ForcedVEXEncoding = VEXEncoding_VEX3;
+IsPrefix = true;
+  } else if (Name == "evex") {
+ForcedVEXEncoding = VEXEncoding_EVEX;
+IsPrefix = true;
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+Parser.Lex();
+continue;
+  }
+}
 break;
   }
 
@@ -4153,10 +4179,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/ms-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/ms-inline-asm-prefix.c
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s 
-check-prefix=INTEL
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=att -S -o -  | FileCheck %s 
-check-prefix=ATT
+
+void check_inline_prefix(void) {
+  __asm {
+// INTEL: {vex} vcvtps2pd   xmm0, xmm1
+// INTEL: {vex2}vcvtps2pd   xmm0, xmm1
+// INTEL: {vex3}vcvtps2pd   xmm0, xmm1
+// INTEL: {evex}vcvtps2pd   xmm0, xmm1
+// ATT:   {vex}   vcvtps2pd   %xmm1, %xmm0
+// ATT:   {vex2}  vcvtps2pd   %xmm1, %xmm0
+// ATT:   {vex3}  vcvtps2pd   %xmm1, %xmm0
+// ATT:   {evex}  vcvtps2pd   %xmm1, %xmm0
+vex vcvtps2pd xmm0, xmm1
+vex2 vcvtps2pd xmm0, xmm1
+vex3 vcvtps2pd xmm0, xmm1
+evex vcvtps2pd xmm0, xmm1
+  }
+}


Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2847,7 +2847,33 @@
   }
   continue;
 }
-
+// Parse MASM style pseudo prefixes.
+// FIXME: This prefix should only be used for MASM, not for intel-syntax.
+if (isParsingIntelSyntax()) {
+  bool IsPrefix = false;
+  if (Name == "vex") {
+ForcedVEXEncoding = VEXEncoding_VEX;
+IsPrefix = true;
+  } else if (Name == "vex2") {
+ForcedVEXEncoding = VEXEncoding_VEX2;
+IsPrefix = true;
+  } else if (Name == "vex3") {
+ForcedVEXEncoding = VEXEncoding_VEX3;
+IsPrefix = true;
+  } else if (Name == "evex") {
+ForcedVEXEncoding = VEXEncoding_EVEX;
+IsPrefix = true;
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+Parser.Lex();
+continue;
+  }
+}

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-04 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/ms-inline-asm-prefix.c:1
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-widows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix 
CHECK
+

LiuChen3 wrote:
> epastor wrote:
> > epastor wrote:
> > > pengfei wrote:
> > > > pengfei wrote:
> > > > > pengfei wrote:
> > > > > > Maybe need `// REQUIRES: x86-registered-target`
> > > > > You may need add att check too since you modified the att code.
> > > > Should it be avalible only when `-fms-compatibility`
> > > The triple is misspelled; it should be `x86_64-pc-windows-msvc` (the "n" 
> > > in windows is missing)
> > A broader question: As written, this applies to anything in Intel syntax. 
> > Is this an Intel syntax feature, or a MASM feature?
> Thanks for your review. After checking with the people of MSVC, I found that 
> prefix without braces is not intel syntax. Actually, we don't know if there 
> any document says what the prefix should be. At least, gcc does have the "{}" 
> in intel syntax, so does clang. We currently decide to only support parsing 
> the prefix without MSVC.
I am not sure. But I think -fasm-blocks, -fms-extensions  also support MASM.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:2851
+// Parse MASM style pseudo prefixes.
+// FIXME: This prefix should only be used for MASM, not for intel-syntax.
+if (isParsingIntelSyntax()) {

I tried to limit to MASM. But I found that the  'isParsingMSInlineAsm()' is not 
accurate.  And then I tried to transmit 'ParsingMSInlineAsm' information 
correctly in AsmPrinterInlineAsm.cpp (according to the '-fasm-blocks' option). 
But I was surprised to find that isParsingMSInlineAsm() is actually used as the 
argument of 'MatchingInlineAsm' in 'MatchAndEmitInstruction()'. This makes me 
confused. Should that 'MatchingInlineAsm' be 'MatchingMSInlineAsm' ?Is this 
MatchingInlineAsm only used by llvm-ml.
It difficult to limit this to MASM at the moment. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-04 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:2851
+// Parse MASM style pseudo prefixes.
+// FIXME: This prefix should only be used for MASM, not for intel-syntax.
+if (isParsingIntelSyntax()) {

craig.topper wrote:
> epastor wrote:
> > LiuChen3 wrote:
> > > I tried to limit to MASM. But I found that the  'isParsingMSInlineAsm()' 
> > > is not accurate.  And then I tried to transmit 'ParsingMSInlineAsm' 
> > > information correctly in AsmPrinterInlineAsm.cpp (according to the 
> > > '-fasm-blocks' option). But I was surprised to find that 
> > > isParsingMSInlineAsm() is actually used as the argument of 
> > > 'MatchingInlineAsm' in 'MatchAndEmitInstruction()'. This makes me 
> > > confused. Should that 'MatchingInlineAsm' be 'MatchingMSInlineAsm' ?Is 
> > > this MatchingInlineAsm only used by llvm-ml.
> > > It difficult to limit this to MASM at the moment. 
> > llvm-ml attempts not to touch **anything** involving inline assembly so 
> > far. The signal that MasmParser.cpp is involved is 
> > `Parser.isParsingMasm()`. However... while I can't answer the majority of 
> > this without more research, I suspect you're correct that 
> > `MatchingInlineAsm` is misnamed. We need to check this, and if so, we 
> > should rename it to avoid confusion.
> MS inline assembly is parsed twice. Once by  clang to find names of C/C++ 
> variables. And again in the backend. GNU inline assembly is only parsed in 
> the backend since variable names are bound explicitly and not referenced in 
> the assembly text.
> 
> IsParsingInlineAsm is set during the clang parsing.
Thanks. That's make sense. 
So 'MatchingInlineAsm' in MatchAndEmitInstruction() more like 
'MatchingMSInlineAsm' and can only be set when parsing the instructions first 
time. And the second time parser can not set 'setParsingMSInlineAsm(true)'. 
That's make difficult to limit the scope to MASM for now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-11 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 304730.
LiuChen3 added a comment.

Rebase.
Adding the '{}' to prefix when generate IR.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

Files:
  clang/lib/AST/Stmt.cpp
  clang/test/CodeGen/X86/ms-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3064,7 +3064,32 @@
   }
   continue;
 }
-
+// Parse MASM style pseudo prefixes.
+if (isParsingMSInlineAsm()) {
+  bool IsPrefix = false;
+  if (Name == "vex") {
+ForcedVEXEncoding = VEXEncoding_VEX;
+IsPrefix = true;
+  } else if (Name == "vex2") {
+ForcedVEXEncoding = VEXEncoding_VEX2;
+IsPrefix = true;
+  } else if (Name == "vex3") {
+ForcedVEXEncoding = VEXEncoding_VEX3;
+IsPrefix = true;
+  } else if (Name == "evex") {
+ForcedVEXEncoding = VEXEncoding_EVEX;
+IsPrefix = true;
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+Parser.Lex();
+continue;
+  }
+}
 break;
   }
 
@@ -4370,10 +4395,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/ms-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/ms-inline-asm-prefix.c
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix=INTEL
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=att -S -o -  | FileCheck %s -check-prefix=ATT
+
+void check_inline_prefix(void) {
+  __asm {
+// INTEL: {vex} vcvtps2pd   xmm0, xmm1
+// INTEL: {vex2}vcvtps2pd   xmm0, xmm1
+// INTEL: {vex3}vcvtps2pd   xmm0, xmm1
+// INTEL: {evex}vcvtps2pd   xmm0, xmm1
+// ATT:   {vex}   vcvtps2pd   %xmm1, %xmm0
+// ATT:   {vex2}  vcvtps2pd   %xmm1, %xmm0
+// ATT:   {vex3}  vcvtps2pd   %xmm1, %xmm0
+// ATT:   {evex}  vcvtps2pd   %xmm1, %xmm0
+vex vcvtps2pd xmm0, xmm1
+vex2 vcvtps2pd xmm0, xmm1
+vex3 vcvtps2pd xmm0, xmm1
+evex vcvtps2pd xmm0, xmm1
+  }
+}
Index: clang/lib/AST/Stmt.cpp
===
--- clang/lib/AST/Stmt.cpp
+++ clang/lib/AST/Stmt.cpp
@@ -791,7 +791,27 @@
 /// Assemble final IR asm string (MS-style).
 std::string MSAsmStmt::generateAsmString(const ASTContext &C) const {
   // FIXME: This needs to be translated into the IR string representation.
-  return std::string(AsmStr);
+  SmallVector Pieces;
+  AsmStr.split(Pieces, "\n\t");
+  std::string MSAsmString;
+  for (size_t i = 0, e = Pieces.size(); i < e; ++i) {
+StringRef Instruction = Pieces[i];
+// For vex/vex2/vex3/evex masm style prefix, convert it to att style
+// since we don't support masm style prefix in backend.
+if (Instruction.startswith("vex "))
+  MSAsmString += '{' + Instruction.substr(0, 3).str() + '}' +
+ Instruction.substr(3).str();
+else if (Instruction.startswith("vex2 ") ||
+ Instruction.startswith("vex3 ") || Instruction.startswith("evex "))
+  MSAsmString += '{' + Instruction.substr(0, 4).str() + '}' +
+ Instruction.substr(4).str();
+else
+  MSAsmString += Instruction.str();
+// If this is not the last instruction, adding back the '\n\t'.
+if (i < e - 1)
+  MSAsmString += "\n\t";
+  }
+  return MSAsmString;
 }
 
 Expr *MSAsmStmt::getOutputExpr(unsigned i) {

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-11 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/AST/Stmt.cpp:801
+// since we don't support masm style prefix in backend.
+if (Instruction.startswith("vex "))
+  MSAsmString += '{' + Instruction.substr(0, 3).str() + '}' +

From X86AsmParser, the vex/evex prefix must be the begin of the instructions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Ping?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/AST/Stmt.cpp:795
+  SmallVector Pieces;
+  AsmStr.split(Pieces, "\n\t");
+  std::string MSAsmString;

pengfei wrote:
> Can we always assume the separator is `\n\t`?
I think so. From the code, we can see '\n\t' will be added to each end of 
statement:
```
case AOK_EndOfStatement:
  OS << "\n\t";
  break;
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 305959.
LiuChen3 added a comment.

1. Check prefix, ignoring case
2. Delete IsPrefix parameter, and delete 'break', so that we won't check prefix 
again. I am not sure if this is right. Att format can allow two prefix and 
using the last one as the finally encoding prefix. I think this may not be the 
original intention of the design.
3. Change the test: checking the IR istead of checking the assembly.
4. Made some format adjustments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

Files:
  clang/lib/AST/Stmt.cpp
  clang/test/CodeGen/X86/ms-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3064,7 +3064,26 @@
   }
   continue;
 }
+// Parse MASM style pseudo prefixes.
+if (isParsingMSInlineAsm()) {
+  if (Name.equals_lower("vex"))
+ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Name.equals_lower("vex2"))
+ForcedVEXEncoding = VEXEncoding_VEX2;
+  else if (Name.equals_lower("vex3"))
+ForcedVEXEncoding = VEXEncoding_VEX3;
+  else if (Name.equals_lower("evex"))
+ForcedVEXEncoding = VEXEncoding_EVEX;
 
+  if (ForcedVEXEncoding != VEXEncoding_Default) {
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+NameLoc = Parser.getTok().getLoc();
+Parser.Lex();
+  }
+}
 break;
   }
 
@@ -4370,10 +4389,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/ms-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/ms-inline-asm-prefix.c
@@ -0,0 +1,14 @@
+// REQUIRES: x86-registered-target
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=intel -S -emit-llvm -o -  | FileCheck %s -check-prefix=INTEL
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=att -S -emit-llvm -o -  | FileCheck %s -check-prefix=ATT
+
+void check_inline_prefix(void) {
+  __asm {
+// INTEL: call void asm sideeffect inteldialect "{vex} vcvtps2pd xmm0, xmm1\0A\09{vex2} vcvtps2pd xmm0, xmm1\0A\09{vex3} vcvtps2pd xmm0, xmm1\0A\09{evex} vcvtps2pd xmm0, xmm1", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
+// ATT: call void asm sideeffect inteldialect "{vex} vcvtps2pd xmm0, xmm1\0A\09{vex2} vcvtps2pd xmm0, xmm1\0A\09{vex3} vcvtps2pd xmm0, xmm1\0A\09{evex} vcvtps2pd xmm0, xmm1", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
+vex vcvtps2pd xmm0, xmm1
+vex2 vcvtps2pd xmm0, xmm1
+vex3 vcvtps2pd xmm0, xmm1
+evex vcvtps2pd xmm0, xmm1
+  }
+}
Index: clang/lib/AST/Stmt.cpp
===
--- clang/lib/AST/Stmt.cpp
+++ clang/lib/AST/Stmt.cpp
@@ -791,7 +791,27 @@
 /// Assemble final IR asm string (MS-style).
 std::string MSAsmStmt::generateAsmString(const ASTContext &C) const {
   // FIXME: This needs to be translated into the IR string representation.
-  return std::string(AsmStr);
+  SmallVector Pieces;
+  AsmStr.split(Pieces, "\n\t");
+  std::string MSAsmString;
+  for (size_t I = 0, E = Pieces.size(); I < E; ++I) {
+StringRef Instruction = Pieces[I];
+// For vex/vex2/vex3/evex masm style prefix, convert it to att style
+// since we don't support masm style prefix in backend.
+if (Instruction.startswith("vex "))
+  MSAsmString += '{' + Instruction.substr(0, 3).str() + '}' +
+ Instruction.substr(3).str();
+else if (Instruction.startswith("vex2 ") ||
+ Instruction.startswith("vex3 ") || Instruction.startswith("evex "))
+  MSAsmString += '{' + Instruction.substr(0, 4).str() + '}' +
+

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked an inline comment as done.
LiuChen3 added a comment.

> 2. Delete IsPrefix parameter, and delete 'break'

It should be 'continue'. Sorry for this mistake.




Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3083
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();

pengfei wrote:
> You just need to check `ForcedVEXEncoding != VEXEncoding_Default`.
I think this is better. Multi vex/evex prefix doesn't make sense.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3084
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))

pengfei wrote:
> Unused assignment. It may suppose to be used on line 3086.
This would be used later. However, this should only be updated when there is 
prefix. I put it in wrong place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked an inline comment as done.
LiuChen3 added a comment.

> It allows more than two, right? like `{vex}{vex2}{vex3} instruction`. I think 
> it should be a bug for att.

Yes, My previous statement is incorrect, it should be ‘two more’. Thanks for 
your correction.
We might need another patch to fix it.




Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3079
+  if (ForcedVEXEncoding != VEXEncoding_Default) {
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");

pengfei wrote:
> Do you need to eat the prefix here?
No. The prefix has been eat.
For example: vex vcvtps2pd xmm0, xmm1 .
Current token is 'vex' and the rest is 'vcvtps2pd xmm0, xmm1'. 'vcvtps2pd' is 
the next token which will be eat in line 3082.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-20 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG776f92e06759: [X86] Add support for vex, vex2, vex3, and 
evex for MASM (authored by LiuChen3).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

Files:
  clang/lib/AST/Stmt.cpp
  clang/test/CodeGen/X86/ms-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3064,7 +3064,26 @@
   }
   continue;
 }
+// Parse MASM style pseudo prefixes.
+if (isParsingMSInlineAsm()) {
+  if (Name.equals_lower("vex"))
+ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Name.equals_lower("vex2"))
+ForcedVEXEncoding = VEXEncoding_VEX2;
+  else if (Name.equals_lower("vex3"))
+ForcedVEXEncoding = VEXEncoding_VEX3;
+  else if (Name.equals_lower("evex"))
+ForcedVEXEncoding = VEXEncoding_EVEX;
 
+  if (ForcedVEXEncoding != VEXEncoding_Default) {
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+NameLoc = Parser.getTok().getLoc();
+Parser.Lex();
+  }
+}
 break;
   }
 
@@ -4370,10 +4389,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/ms-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/ms-inline-asm-prefix.c
@@ -0,0 +1,14 @@
+// REQUIRES: x86-registered-target
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=intel -S -emit-llvm -o -  | FileCheck %s -check-prefix=INTEL
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-windows-msvc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -fasm-blocks -mllvm -x86-asm-syntax=att -S -emit-llvm -o -  | FileCheck %s -check-prefix=ATT
+
+void check_inline_prefix(void) {
+  __asm {
+// INTEL: call void asm sideeffect inteldialect "{vex} vcvtps2pd xmm0, xmm1\0A\09{vex2} vcvtps2pd xmm0, xmm1\0A\09{vex3} vcvtps2pd xmm0, xmm1\0A\09{evex} vcvtps2pd xmm0, xmm1", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
+// ATT: call void asm sideeffect inteldialect "{vex} vcvtps2pd xmm0, xmm1\0A\09{vex2} vcvtps2pd xmm0, xmm1\0A\09{vex3} vcvtps2pd xmm0, xmm1\0A\09{evex} vcvtps2pd xmm0, xmm1", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
+vex vcvtps2pd xmm0, xmm1
+vex2 vcvtps2pd xmm0, xmm1
+vex3 vcvtps2pd xmm0, xmm1
+evex vcvtps2pd xmm0, xmm1
+  }
+}
Index: clang/lib/AST/Stmt.cpp
===
--- clang/lib/AST/Stmt.cpp
+++ clang/lib/AST/Stmt.cpp
@@ -791,7 +791,27 @@
 /// Assemble final IR asm string (MS-style).
 std::string MSAsmStmt::generateAsmString(const ASTContext &C) const {
   // FIXME: This needs to be translated into the IR string representation.
-  return std::string(AsmStr);
+  SmallVector Pieces;
+  AsmStr.split(Pieces, "\n\t");
+  std::string MSAsmString;
+  for (size_t I = 0, E = Pieces.size(); I < E; ++I) {
+StringRef Instruction = Pieces[I];
+// For vex/vex2/vex3/evex masm style prefix, convert it to att style
+// since we don't support masm style prefix in backend.
+if (Instruction.startswith("vex "))
+  MSAsmString += '{' + Instruction.substr(0, 3).str() + '}' +
+ Instruction.substr(3).str();
+else if (Instruction.startswith("vex2 ") ||
+ Instruction.startswith("vex3 ") || Instruction.startswith("evex "))
+  MSAsmString += '{' + Instruction.substr(0, 4).str() + '}' +
+ Instruction.substr(4).str();
+else
+  MSAsmString += Instruction.str();
+// If this is not the last instruction, adding back the '\n\t'.
+if (I < E - 1)
+  MSAsmString += "\n\t";
+  }
+  return MSAsmS

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-10-08 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 abandoned this revision.
LiuChen3 added a comment.

In D60748#2310148 , @RKSimon wrote:

> @wxiao3 @LiuChen3 Are you still looking at this or should it be abandoned?

I will abandon this patch for it is difficult to confirm the behavior of gcc.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D78699: [X86] Passing union type through register.

2020-10-08 Thread LiuChen via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG26cfb6e562f1: [X86] Passing union type through register 
(authored by LiuChen3).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78699/new/

https://reviews.llvm.org/D78699

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/X86/avx-union.c


Index: clang/test/CodeGen/X86/avx-union.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx-union.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx512f -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX512
+// This tests verifies that a union parameter should pass by a vector 
regitster whose first eightbyte is SSE and the other eightbytes are SSEUP.
+
+typedef int __m256 __attribute__ ((__vector_size__ (32)));
+typedef int __m512 __attribute__ ((__vector_size__ (64)));
+
+union M256 {
+  double d;
+  __m256 m;
+};
+
+union M512 {
+  double d;
+  __m512 m;
+};
+
+extern void foo1(union M256 A);
+extern void foo2(union M512 A);
+union M256 m1;
+union M512 m2;
+// CHECK-LABEL: define dso_local void @test()
+// CHECK:   void @foo1(<4 x double>
+// AVX: call void @foo2(%union.M512* byval(%union.M512) align 64
+// AVX512:  call void @foo2(<8 x double>
+void test() {
+  foo1(m1);
+  foo2(m2);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -3061,6 +3061,7 @@
 
 // Classify the fields one at a time, merging the results.
 unsigned idx = 0;
+bool IsUnion = RT->isUnionType();
 for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
   uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
@@ -3071,14 +3072,17 @@
 continue;
 
   // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
-  // four eightbytes, or it contains unaligned fields, it has class MEMORY.
+  // eight eightbytes, or it contains unaligned fields, it has class 
MEMORY.
   //
-  // The only case a 256-bit wide vector could be used is when the struct
-  // contains a single 256-bit element. Since Lo and Hi logic isn't 
extended
-  // to work for sizes wider than 128, early check and fallback to memory.
+  // The only case a 256-bit or a 512-bit wide vector could be used is when
+  // the struct contains a single 256-bit or 512-bit element. Early check
+  // and fallback to memory.
   //
-  if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) ||
- Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
+  // FIXME: Extended the Lo and Hi logic properly to work for size wider
+  // than 128.
+  if (Size > 128 &&
+  ((!IsUnion && Size != getContext().getTypeSize(i->getType())) ||
+   Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
 Lo = Memory;
 postMerge(Size, Lo, Hi);
 return;


Index: clang/test/CodeGen/X86/avx-union.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx-union.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX512
+// This tests verifies that a union parameter should pass by a vector regitster whose first eightbyte is SSE and the other eightbytes are SSEUP.
+
+typedef int __m256 __attribute__ ((__vector_size__ (32)));
+typedef int __m512 __attribute__ ((__vector_size__ (64)));
+
+union M256 {
+  double d;
+  __m256 m;
+};
+
+union M512 {
+  double d;
+  __m512 m;
+};
+
+extern void foo1(union M256 A);
+extern void foo2(union M512 A);
+union M256 m1;
+union M512 m2;
+// CHECK-LABEL: define dso_local void @test()
+// CHECK:   void @foo1(<4 x double>
+// AVX: call void @foo2(%union.M512* byval(%union.M512) align 64
+// AVX512:  call void @foo2(<8 x double>
+void test() {
+  foo1(m1);
+  foo2(m2);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -3061,6 +3061,7 @@
 
 // Classify the fields one at a time, merging the results.
 unsigned idx = 0;
+bool IsUnion = RT->isUnionType();
 for (RecordDecl::field_iterator i = RD->field_beg

[PATCH] D78699: [X86] Passing union type through register.

2020-10-08 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D78699#2320678 , @bruno wrote:

> LGTM

Thanks for your review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78699/new/

https://reviews.llvm.org/D78699

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D78699: [X86] Passing union type through register.

2020-10-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D78699#2328541 , @dyung wrote:

> Hi, the test you added seems to pass both before and after your change, is 
> this intended?

Oh, yes. I made mistake on my RUN line. So these tests are not really checked. 
Thanks for your help!
I will make another patch to fix this testcase.




Comment at: clang/test/CodeGen/X86/avx-union.c:22
+union M512 m2;
+// CHECK-LABEL: define dso_local void @test()
+// CHECK:   void @foo1(<4 x double>

dyung wrote:
> When I fix the run lines above, I'm not getting "dso_local". Might want to 
> double check that.
There actually have no 'dso_local'. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78699/new/

https://reviews.llvm.org/D78699

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

> D89105   appears to use only `"avx512vl , 
> avx512vnni | avxvnni"`.
> Does it mean `(avx512vl , avx512vnni) | avxvnni` or `avx512vl , (avx512vnni | 
> avxvnni)` ?

Yes. "avx512vl , avx512vnni | avxvnni" means (avx512vl , avx512vnni) | avxvnni. 
This is the reason why we need this patch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89361: [X86][NFC] Fix RUN line bug in the testcase

2020-10-13 Thread LiuChen via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rGbd05afcb3f40: [X86][NFC] Fix RUN line bug in the testcase 
(authored by LiuChen3).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D89361?vs=298016&id=298024#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89361/new/

https://reviews.llvm.org/D89361

Files:
  clang/test/CodeGen/X86/avx-union.c


Index: clang/test/CodeGen/X86/avx-union.c
===
--- clang/test/CodeGen/X86/avx-union.c
+++ clang/test/CodeGen/X86/avx-union.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX
-// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx512f -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX512
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature 
+avx512f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK 
--check-prefix=AVX512
 // This tests verifies that a union parameter should pass by a vector 
regitster whose first eightbyte is SSE and the other eightbytes are SSEUP.
 
 typedef int __m256 __attribute__ ((__vector_size__ (32)));
@@ -19,8 +19,8 @@
 extern void foo2(union M512 A);
 union M256 m1;
 union M512 m2;
-// CHECK-LABEL: define dso_local void @test()
-// CHECK:   void @foo1(<4 x double>
+// CHECK-LABEL: define void @test()
+// CHECK:   call void @foo1(<4 x double>
 // AVX: call void @foo2(%union.M512* byval(%union.M512) align 64
 // AVX512:  call void @foo2(<8 x double>
 void test() {


Index: clang/test/CodeGen/X86/avx-union.c
===
--- clang/test/CodeGen/X86/avx-union.c
+++ clang/test/CodeGen/X86/avx-union.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX
-// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s || FileCheck < %t %s --check-prefix=CHECK, AVX512
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+// RUN: %clang_cc1 -w -ffreestanding -triple x86_64-linux-gnu -target-feature +avx512f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 // This tests verifies that a union parameter should pass by a vector regitster whose first eightbyte is SSE and the other eightbytes are SSEUP.
 
 typedef int __m256 __attribute__ ((__vector_size__ (32)));
@@ -19,8 +19,8 @@
 extern void foo2(union M512 A);
 union M256 m1;
 union M512 m2;
-// CHECK-LABEL: define dso_local void @test()
-// CHECK:   void @foo1(<4 x double>
+// CHECK-LABEL: define void @test()
+// CHECK:   call void @foo1(<4 x double>
 // AVX: call void @foo2(%union.M512* byval(%union.M512) align 64
 // AVX512:  call void @foo2(<8 x double>
 void test() {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89361: [X86][NFC] Fix RUN line bug in the testcase

2020-10-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Thanks for all of your help!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89361/new/

https://reviews.llvm.org/D89361

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/CodeGenFunction.cpp:2389
 }
-if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
+if (!llvm::all_of(ReqFeatures, [&](StringRef Feature) {
+  if (!CallerFeatureMap.lookup(Feature)) {

echristo wrote:
> Not sure why the change here. It'd be good to be able to reuse the same code 
> here. What's up?
Since here is no need for complex feature processing, just a simple handle here.
Besides, 'hasRequiredFeatures' function has different input argument and does 
not handle 'MissingFeature'. 



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-18 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

ping?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89747: Add option to use older clang ABI behavior when passing certain union types as function arguments

2020-10-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Adding an option compatible with old abi is good method.  Looks good for me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89747/new/

https://reviews.llvm.org/D89747

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-21 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D89184#2346453 , @pengfei wrote:

> LGTM. But I suggest you waiting for 1 or 2 days to see if other reviewers 
> object.

Sure. Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
LiuChen3 requested review of this revision.

For now, we lost the encoding information if we using inline assembly.
The encoding for the inline assembly will keep default even if we add
the vex/evex prefix.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90009

Files:
  clang/test/CodeGen/X86/att-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
  llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp

Index: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -346,6 +346,16 @@
 O << "\trepne\t";
   else if (Flags & X86::IP_HAS_REPEAT)
 O << "\trep\t";
+
+  // These all require a pseudo prefix
+  if (Flags & X86::Force_VEXEncoding)
+O << "\t{vex}";
+  else if (Flags & X86::Force_VEX2Encoding)
+O << "\t{vex2}";
+  else if (Flags & X86::Force_VEX3Encoding)
+O << "\t{vex3}";
+  else if (Flags & X86::Force_EVEXEncoding)
+O << "\t{evex}";
 }
 
 void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
Index: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
===
--- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -55,15 +55,19 @@
   /// The constants to describe instr prefixes if there are
   enum IPREFIXES {
 IP_NO_PREFIX = 0,
-IP_HAS_OP_SIZE = 1,
-IP_HAS_AD_SIZE = 2,
-IP_HAS_REPEAT_NE = 4,
-IP_HAS_REPEAT = 8,
-IP_HAS_LOCK = 16,
-IP_HAS_NOTRACK = 32,
-IP_USE_VEX3 = 64,
-IP_USE_DISP8 = 128,
-IP_USE_DISP32 = 256,
+IP_HAS_OP_SIZE = 1U << 0,
+IP_HAS_AD_SIZE = 1U << 1,
+IP_HAS_REPEAT_NE = 1U << 2,
+IP_HAS_REPEAT = 1U << 3,
+IP_HAS_LOCK = 1U << 4,
+IP_HAS_NOTRACK = 1U << 5,
+IP_USE_VEX3 = 1U << 6,
+IP_USE_DISP8 = 1U << 7,
+IP_USE_DISP32 = 1U << 8,
+Force_VEXEncoding = 1U << 9,
+Force_VEX2Encoding = 1U << 10,
+Force_VEX3Encoding = 1U << 11,
+Force_EVEXEncoding = 1U << 12,
   };
 
   enum OperandType : unsigned {
Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -83,6 +83,7 @@
   enum VEXEncoding {
 VEXEncoding_Default,
 VEXEncoding_VEX,
+VEXEncoding_VEX2,
 VEXEncoding_VEX3,
 VEXEncoding_EVEX,
   };
@@ -2818,8 +2819,10 @@
 return Error(Parser.getTok().getLoc(), "Expected '}'");
   Parser.Lex(); // Eat curly.
 
-  if (Prefix == "vex" || Prefix == "vex2")
+  if (Prefix == "vex")
 ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Prefix == "vex2")
+ForcedVEXEncoding = VEXEncoding_VEX2;
   else if (Prefix == "vex3")
 ForcedVEXEncoding = VEXEncoding_VEX3;
   else if (Prefix == "evex")
@@ -3837,6 +3840,7 @@
 return Match_Unsupported;
 
   if ((ForcedVEXEncoding == VEXEncoding_VEX ||
+   ForcedVEXEncoding == VEXEncoding_VEX2 ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
   (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
 return Match_Unsupported;
@@ -3879,10 +3883,19 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // Passing the prefix info to Printer if VEX or EVEX encoding is forced.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::Force_VEXEncoding;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::Force_VEX2Encoding;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::Force_EVEXEncoding;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3) {
+// If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
+// encoder.
 Prefixes |= X86::IP_USE_VEX3;
+Prefixes |= X86::Force_VEX3Encoding;
+  }
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/att-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/att-inline-asm-prefix.c
@@ -0,0 +1,17 @@
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -S -o -  | FileCheck %s -check-prefix CHECK
+
+// This test is to check if the prefix in inline assembly is correctly
+// preserved.
+
+void check_inline_prefix(void) {
+  __asm__ (
+// CHECK: {vex} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex2} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex3} vcvtps2p

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:2824
 ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Prefix == "vex2")
+ForcedVEXEncoding = VEXEncoding_VEX2;

pengfei wrote:
> I think it's reasonable if we generate "{vex}" for input "{vex2}"
GCC will out put {vex2} if the input is {vex2}.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/att-inline-asm-prefix.c:14
+"{vex2} vcvtps2pd %xmm0, %xmm1\n\t"
+"{vex3} vcvtps2pd %xmm0, %xmm1\n\t"
+"{evex} vcvtps2pd %xmm0, %xmm1\n\t"

> Does this bug only effect the printing of inline assembly to a .s file? 

Yes. Using "-c" to out .o file directly will get right encoding.

But if we firstly output the .s file and then compile it, the end encoding is 
wrong. For this example, it will all be two-byte vex prefix.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3896
+// encoder.
 Prefixes |= X86::IP_USE_VEX3;
+Prefixes |= X86::Force_VEX3Encoding;

craig.topper wrote:
> Why do we need Force_VEX3Encoding and IP_USE_VEX3?
I think this will make all of IP_USE_VEX3 the 3-byte vex prefix instruction 
output with {vex3}. The IP_USE_VEX3 is for encoder.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3896
+// encoder.
 Prefixes |= X86::IP_USE_VEX3;
+Prefixes |= X86::Force_VEX3Encoding;

craig.topper wrote:
> LiuChen3 wrote:
> > craig.topper wrote:
> > > Why do we need Force_VEX3Encoding and IP_USE_VEX3?
> > I think this will make all of IP_USE_VEX3 the 3-byte vex prefix instruction 
> > output with {vex3}. The IP_USE_VEX3 is for encoder.
> Isn't this the only place we set IP_USE_VEX3?
Yes. I think it's reasonable to use this flag only. And naming other Force_*  
to   IP_USE_* .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 300188.
LiuChen3 added a comment.

Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

Files:
  clang/test/CodeGen/X86/att-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
  llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp

Index: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -346,6 +346,21 @@
 O << "\trepne\t";
   else if (Flags & X86::IP_HAS_REPEAT)
 O << "\trep\t";
+
+  // These all require a pseudo prefix
+  if (Flags & X86::IP_USE_VEX)
+O << "\t{vex}";
+  else if (Flags & X86::IP_USE_VEX2)
+O << "\t{vex2}";
+  else if (Flags & X86::IP_USE_VEX3)
+O << "\t{vex3}";
+  else if (Flags & X86::IP_USE_EVEX)
+O << "\t{evex}";
+
+  if (Flags & X86::IP_USE_DISP8)
+O << "\t{disp8}";
+  else if (Flags & X86::IP_USE_DISP32)
+O << "\t{disp32}";
 }
 
 void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
Index: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
===
--- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -55,15 +55,18 @@
   /// The constants to describe instr prefixes if there are
   enum IPREFIXES {
 IP_NO_PREFIX = 0,
-IP_HAS_OP_SIZE = 1,
-IP_HAS_AD_SIZE = 2,
-IP_HAS_REPEAT_NE = 4,
-IP_HAS_REPEAT = 8,
-IP_HAS_LOCK = 16,
-IP_HAS_NOTRACK = 32,
-IP_USE_VEX3 = 64,
-IP_USE_DISP8 = 128,
-IP_USE_DISP32 = 256,
+IP_HAS_OP_SIZE =   1U << 0,
+IP_HAS_AD_SIZE =   1U << 1,
+IP_HAS_REPEAT_NE = 1U << 2,
+IP_HAS_REPEAT =1U << 3,
+IP_HAS_LOCK =  1U << 4,
+IP_HAS_NOTRACK =   1U << 5,
+IP_USE_VEX =   1U << 6,
+IP_USE_VEX2 =  1U << 7,
+IP_USE_VEX3 =  1U << 8,
+IP_USE_EVEX =  1U << 9,
+IP_USE_DISP8 = 1U << 10,
+IP_USE_DISP32 =1U << 11,
   };
 
   enum OperandType : unsigned {
Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -83,6 +83,7 @@
   enum VEXEncoding {
 VEXEncoding_Default,
 VEXEncoding_VEX,
+VEXEncoding_VEX2,
 VEXEncoding_VEX3,
 VEXEncoding_EVEX,
   };
@@ -2818,8 +2819,10 @@
 return Error(Parser.getTok().getLoc(), "Expected '}'");
   Parser.Lex(); // Eat curly.
 
-  if (Prefix == "vex" || Prefix == "vex2")
+  if (Prefix == "vex")
 ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Prefix == "vex2")
+ForcedVEXEncoding = VEXEncoding_VEX2;
   else if (Prefix == "vex3")
 ForcedVEXEncoding = VEXEncoding_VEX3;
   else if (Prefix == "evex")
@@ -3837,6 +3840,7 @@
 return Match_Unsupported;
 
   if ((ForcedVEXEncoding == VEXEncoding_VEX ||
+   ForcedVEXEncoding == VEXEncoding_VEX2 ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
   (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
 return Match_Unsupported;
@@ -3879,10 +3883,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/att-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/att-inline-asm-prefix.c
@@ -0,0 +1,25 @@
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -S -o -  | FileCheck %s -check-prefix CHECK
+
+// This test is to check if the prefix in inline assembly is correctly
+// preserved.
+
+void check_inline_prefix(void) {
+  __asm__ (
+// CHECK: vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex2} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex3} vcvtps2pd %xmm0, %xmm1
+// CHECK: {evex} vcvtps2pd %xmm0, %xmm1
+// CHECK: movl $1, (%rax)
+// CHECK: {disp8}  movl $1, (%rax)
+// CHECK: {disp32} movl $1, (%rax)
+"vcvtps2pd %xmm0, %xmm1\n\t"
+"{vex} vcvt

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp:352
+  if (Flags & X86::Force_VEXEncoding)
+O << "\t{vex}";
+  else if (Flags & X86::Force_VEX2Encoding)

pengfei wrote:
> `"\t{vex}\t"` ?
No "\t" needed to add to the end of prefix. The printer will handle it 
correctly. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-25 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Hi, @echristo. What's your opinion here?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-25 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG180548c5c784: [X86] VEX/EVEX prefix doesn't work for 
inline assembly. (authored by LiuChen3).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

Files:
  clang/test/CodeGen/X86/att-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
  llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp

Index: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -346,6 +346,21 @@
 O << "\trepne\t";
   else if (Flags & X86::IP_HAS_REPEAT)
 O << "\trep\t";
+
+  // These all require a pseudo prefix
+  if (Flags & X86::IP_USE_VEX)
+O << "\t{vex}";
+  else if (Flags & X86::IP_USE_VEX2)
+O << "\t{vex2}";
+  else if (Flags & X86::IP_USE_VEX3)
+O << "\t{vex3}";
+  else if (Flags & X86::IP_USE_EVEX)
+O << "\t{evex}";
+
+  if (Flags & X86::IP_USE_DISP8)
+O << "\t{disp8}";
+  else if (Flags & X86::IP_USE_DISP32)
+O << "\t{disp32}";
 }
 
 void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
Index: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
===
--- llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -55,15 +55,18 @@
   /// The constants to describe instr prefixes if there are
   enum IPREFIXES {
 IP_NO_PREFIX = 0,
-IP_HAS_OP_SIZE = 1,
-IP_HAS_AD_SIZE = 2,
-IP_HAS_REPEAT_NE = 4,
-IP_HAS_REPEAT = 8,
-IP_HAS_LOCK = 16,
-IP_HAS_NOTRACK = 32,
-IP_USE_VEX3 = 64,
-IP_USE_DISP8 = 128,
-IP_USE_DISP32 = 256,
+IP_HAS_OP_SIZE =   1U << 0,
+IP_HAS_AD_SIZE =   1U << 1,
+IP_HAS_REPEAT_NE = 1U << 2,
+IP_HAS_REPEAT =1U << 3,
+IP_HAS_LOCK =  1U << 4,
+IP_HAS_NOTRACK =   1U << 5,
+IP_USE_VEX =   1U << 6,
+IP_USE_VEX2 =  1U << 7,
+IP_USE_VEX3 =  1U << 8,
+IP_USE_EVEX =  1U << 9,
+IP_USE_DISP8 = 1U << 10,
+IP_USE_DISP32 =1U << 11,
   };
 
   enum OperandType : unsigned {
Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -83,6 +83,7 @@
   enum VEXEncoding {
 VEXEncoding_Default,
 VEXEncoding_VEX,
+VEXEncoding_VEX2,
 VEXEncoding_VEX3,
 VEXEncoding_EVEX,
   };
@@ -2818,8 +2819,10 @@
 return Error(Parser.getTok().getLoc(), "Expected '}'");
   Parser.Lex(); // Eat curly.
 
-  if (Prefix == "vex" || Prefix == "vex2")
+  if (Prefix == "vex")
 ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Prefix == "vex2")
+ForcedVEXEncoding = VEXEncoding_VEX2;
   else if (Prefix == "vex3")
 ForcedVEXEncoding = VEXEncoding_VEX3;
   else if (Prefix == "evex")
@@ -3837,6 +3840,7 @@
 return Match_Unsupported;
 
   if ((ForcedVEXEncoding == VEXEncoding_VEX ||
+   ForcedVEXEncoding == VEXEncoding_VEX2 ||
ForcedVEXEncoding == VEXEncoding_VEX3) &&
   (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
 return Match_Unsupported;
@@ -3879,10 +3883,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefixes |= X86::IP_USE_EVEX;
 
   // Set encoded flags for {disp8} and {disp32}.
   if (ForcedDispEncoding == DispEncoding_Disp8)
Index: clang/test/CodeGen/X86/att-inline-asm-prefix.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/att-inline-asm-prefix.c
@@ -0,0 +1,25 @@
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc -target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl -S -o -  | FileCheck %s -check-prefix CHECK
+
+// This test is to check if the prefix in inline assembly is correctly
+// preserved.
+
+void check_inline_prefix(void) {
+  __asm__ (
+// CHECK: vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex2} vcvtps2pd %xmm0, %xmm1
+// CHECK: {vex3} vcvtps2pd %xmm0, %xmm1
+// CHECK: {evex} vcvtps2pd %xmm0, %xm

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-26 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D90009#2354849 , @kiranchandramohan 
wrote:

> I have added a fix to run the test only when the X86 target is available. 
> Please feel free to change if it is not the correct fix.
> https://github.com/llvm/llvm-project/commit/c551ba0e90bd2b49ef501d591f8362ba44e5484d

Thanks for your help!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-28 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Ping?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-28 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D89184#2360846 , @echristo wrote:

> I'll take a look tomorrow, sorry for the delay.

No problem. Thanks!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89184: Support complex target features combinations

2020-10-29 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D89184#2363591 , @echristo wrote:

> Let's go ahead and unblock you, but getting a lot of this refactored would be 
> great if you can. I think it's hitting the limits of the original design. :)

Thanks!  :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-10-29 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
LiuChen3 requested review of this revision.

For MASM syntax, the prefixes are not enclosed in braces.
The assembly code should like:

  "evex vcvtps2pd xmm0, xmm1"

There are still some avx512 tests need to be improved with 'evex'
prefix. But I think it's better to discuss this syntax first.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90441

Files:
  clang/test/CodeGen/X86/ms-inline-asm-prefix.c
  llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
  llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp

Index: llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -42,6 +42,17 @@
 raw_ostream &OS) {
   printInstFlags(MI, OS);
 
+  unsigned Flags = MI->getFlags();
+  // These all require a pseudo prefix
+  if (Flags & X86::IP_USE_VEX)
+OS << "\tvex";
+  else if (Flags & X86::IP_USE_VEX2)
+OS << "\tvex2";
+  else if (Flags & X86::IP_USE_VEX3)
+OS << "\tvex3";
+  else if (Flags & X86::IP_USE_EVEX)
+OS << "\tevex";
+
   // In 16-bit mode, print data16 as data32.
   if (MI->getOpcode() == X86::DATA16_PREFIX &&
   STI.getFeatureBits()[X86::Mode16Bit]) {
Index: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -346,21 +346,6 @@
 O << "\trepne\t";
   else if (Flags & X86::IP_HAS_REPEAT)
 O << "\trep\t";
-
-  // These all require a pseudo prefix
-  if (Flags & X86::IP_USE_VEX)
-O << "\t{vex}";
-  else if (Flags & X86::IP_USE_VEX2)
-O << "\t{vex2}";
-  else if (Flags & X86::IP_USE_VEX3)
-O << "\t{vex3}";
-  else if (Flags & X86::IP_USE_EVEX)
-O << "\t{evex}";
-
-  if (Flags & X86::IP_USE_DISP8)
-O << "\t{disp8}";
-  else if (Flags & X86::IP_USE_DISP32)
-O << "\t{disp32}";
 }
 
 void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
Index: llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
===
--- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -48,6 +48,22 @@
 
   printInstFlags(MI, OS);
 
+  unsigned Flags = MI->getFlags();
+  // These all require a pseudo prefix
+  if (Flags & X86::IP_USE_VEX)
+OS << "\t{vex}";
+  else if (Flags & X86::IP_USE_VEX2)
+OS << "\t{vex2}";
+  else if (Flags & X86::IP_USE_VEX3)
+OS << "\t{vex3}";
+  else if (Flags & X86::IP_USE_EVEX)
+OS << "\t{evex}";
+
+  if (Flags & X86::IP_USE_DISP8)
+OS << "\t{disp8}";
+  else if (Flags & X86::IP_USE_DISP32)
+OS << "\t{disp32}";
+
   // Output CALLpcrel32 as "callq" in 64-bit mode.
   // In Intel annotation it's always emitted as "call".
   //
Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
===
--- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2847,7 +2847,30 @@
   }
   continue;
 }
-
+// Parse MASM style pseudo prefixes.
+if (isParsingIntelSyntax()) {
+  bool IsPrefix = false;
+  if (Name == "vex" || Name == "vex2") {
+ForcedVEXEncoding = VEXEncoding_VEX;
+IsPrefix = true;
+  }
+  else if (Name == "vex3") {
+ForcedVEXEncoding = VEXEncoding_VEX3;
+IsPrefix = true;
+  }
+  else if (Name == "evex") {
+ForcedVEXEncoding = VEXEncoding_EVEX;
+IsPrefix = true;
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");
+// FIXME: The mnemonic won't match correctly if its not in lower case.
+Name = Parser.getTok().getString();
+Parser.Lex();
+  }
+}
 break;
   }
 
@@ -4146,10 +4169,16 @@
 
   MCInst Inst;
 
-  // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
-  // encoder.
-  if (ForcedVEXEncoding == VEXEncoding_VEX3)
+  // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
+  // encoder and printer.
+  if (ForcedVEXEncoding == VEXEncoding_VEX)
+Prefixes |= X86::IP_USE_VEX;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX2)
+Prefixes |= X86::IP_USE_VEX2;
+  else if (ForcedVEXEncoding == VEXEncoding_VEX3)
 Prefixes |= X86::IP_USE_VEX3;
+  else if (ForcedVEXEncoding == VEXEncoding_EVEX)
+Prefi

[PATCH] D89105: [X86] Support Intel avxvnni

2020-10-30 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked 2 inline comments as done.
LiuChen3 added a comment.

Thanks for all of your review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89105/new/

https://reviews.llvm.org/D89105

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-02 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/ms-inline-asm-prefix.c:1
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-widows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix 
CHECK
+

epastor wrote:
> epastor wrote:
> > pengfei wrote:
> > > pengfei wrote:
> > > > pengfei wrote:
> > > > > Maybe need `// REQUIRES: x86-registered-target`
> > > > You may need add att check too since you modified the att code.
> > > Should it be avalible only when `-fms-compatibility`
> > The triple is misspelled; it should be `x86_64-pc-windows-msvc` (the "n" in 
> > windows is missing)
> A broader question: As written, this applies to anything in Intel syntax. Is 
> this an Intel syntax feature, or a MASM feature?
Thanks for your review. After checking with the people of MSVC, I found that 
prefix without braces is not intel syntax. Actually, we don't know if there any 
document says what the prefix should be. At least, gcc does have the "{}" in 
intel syntax, so does clang. We currently decide to only support parsing the 
prefix without MSVC.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D97259: [X86] Support amx-int8 intrinsic.

2021-02-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: pengfei, hiraditya.
LiuChen3 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Adding support for intrinsics of TDPBSUD/TDPBUSD/TDPBUUD.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97259

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -19,6 +19,9 @@
 ; CHECK-NEXT:tileloadd (%rsi,%rdx), %tmm1
 ; CHECK-NEXT:tileloadd (%rsi,%rdx), %tmm2
 ; CHECK-NEXT:tdpbssd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
 ; CHECK-NEXT:vzeroupper
@@ -26,8 +29,11 @@
   %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
   %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
   %b = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
-  %d = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d)
+  %d0 = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
+  %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
+  %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
+  %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
 
   ret void
 }
@@ -35,4 +41,7 @@
 declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
 declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
 declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -884,6 +884,9 @@
   // We only collect the tile shape that is defined.
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -155,6 +155,9 @@
 llvm_unreachable("Unexpected machine instruction on tile");
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
@@ -249,6 +252,9 @@
   case X86::PTILELOADDV:
   case X86::PTILESTOREDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 return true;
   }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -67,7 +67,10 @@
   }
   // a * b + c
   // The shape depends on which operand.
-  case Intrinsic::x86_tdpbssd_internal: {
+  case Intrinsic::x86_tdpbssd_internal:
+  case Intrinsic::x86_tdpbsud_internal:
+  case Intrinsic::x86_tdpbusd_internal:
+  case Intrinsic::x86_tdpbuud_internal: {
 switch (OpNo) {
 case 3:
   Row = II->getArgOperand(0);
Index: llvm/lib/Target/X86/X86InstrAMX.td
===
--- llvm/lib/Target/X86/X86InstrAMX.td
+++ llvm/lib/Tar

[PATCH] D97259: [X86] Support amx-int8 intrinsic.

2021-02-22 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/include/clang/Basic/BuiltinsX86_64.def:106
 TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, "V256iUsUsUsV256iV256iV256i", 
"n", "amx-int8")
+
+TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, "V256iUsUsUsV256iV256iV256i", 
"n", "amx-int8")

Forgot to delete the blank line. I will delete these in next patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97259/new/

https://reviews.llvm.org/D97259

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D97259: [X86] Support amx-int8 intrinsic.

2021-02-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:4625
+
+case Intrinsic::x86_tdpbssd_internal:
+case Intrinsic::x86_tdpbsud_internal:

pengfei wrote:
> Nit: Maybe we can try to move the lowering of the AMX intrinsics to the td 
> file as a follow up.
Good idea. I will work on that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97259/new/

https://reviews.llvm.org/D97259

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D97259: [X86] Support amx-int8 intrinsic.

2021-02-23 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf8b9035aae44: [X86] Support amx-int8 intrinsic. (authored by 
LiuChen3).

Changed prior to commit:
  https://reviews.llvm.org/D97259?vs=325687&id=325706#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97259/new/

https://reviews.llvm.org/D97259

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -19,6 +19,9 @@
 ; CHECK-NEXT:tileloadd (%rsi,%rdx), %tmm1
 ; CHECK-NEXT:tileloadd (%rsi,%rdx), %tmm2
 ; CHECK-NEXT:tdpbssd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
 ; CHECK-NEXT:vzeroupper
@@ -26,8 +29,11 @@
   %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
   %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
   %b = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
-  %d = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d)
+  %d0 = call x86_amx @llvm.x86.tdpbssd.internal(i16 8, i16 8, i16 8, x86_amx %c, x86_amx %a, x86_amx %b)
+  %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
+  %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
+  %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
 
   ret void
 }
@@ -35,4 +41,7 @@
 declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
 declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
 declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -884,6 +884,9 @@
   // We only collect the tile shape that is defined.
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -155,6 +155,9 @@
 llvm_unreachable("Unexpected machine instruction on tile");
   case X86::PTILELOADDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
@@ -249,6 +252,9 @@
   case X86::PTILELOADDV:
   case X86::PTILESTOREDV:
   case X86::PTDPBSSDV:
+  case X86::PTDPBSUDV:
+  case X86::PTDPBUSDV:
+  case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
 return true;
   }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -67,7 +67,10 @@
   }
   // a * b + c
   // The shape depends on which operand.
-  case Intrinsic::x86_tdpbssd_internal: {
+  case Intrinsic::x86_tdpbssd_internal:
+  case Intrinsic::x86_tdpbsud_internal:
+  case Intrinsic::x86_tdpbusd_internal:
+  case Intrinsic::x86_tdpbuud_internal: {
 switch (OpNo) {
 case 3:
   Row = II->getArgOperand(0);
Index: llvm/lib/Target/X86/X86InstrAMX.td
==

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: pengfei, hiraditya.
LiuChen3 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Adding support for intrinsics of AMX-BF16.
This patch alse fix a bug that AMX-INT8 instructions will be selected with wrong
predicate.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97358

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -1,11 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-int8,+amx-bf16 -verify-machineinstrs | FileCheck %s
 
 define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-LABEL: test_amx:
 ; CHECK:   # %bb.0:
-; CHECK-NEXT:vpxord %zmm0, %zmm0, %zmm0
-; CHECK-NEXT:vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:xorps %xmm0, %xmm0
+; CHECK-NEXT:movups %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:movups %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:movups %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:movups %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:movb $1, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:movb $8, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:movw $8, -{{[0-9]+}}(%rsp)
@@ -22,9 +25,9 @@
 ; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbf16ps %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
-; CHECK-NEXT:vzeroupper
 ; CHECK-NEXT:retq
   %c = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
   %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride)
@@ -33,7 +36,8 @@
   %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
   %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
   %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
+  %d4 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %d3, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d4)
 
   ret void
 }
@@ -44,4 +48,5 @@
 declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -888,6 +888,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
 ShapeT Shape(&MO1, &MO2, MRI);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -159,6 +159,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
 ShapeT Shape(&MO1, &MO2, MRI);
@@ -256,6 +257,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 return true;
   }
 }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -70,7 +70,8 @@
   case Intrinsic::x86_tdpbssd_internal:
   case Intrinsic::x86_tdpbsud_internal:
   case Intrinsic::x

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 325988.
LiuChen3 added a comment.

Adding back 'avx512f' to amx-tile-basic.ll


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97358/new/

https://reviews.llvm.org/D97358

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-int8,+amx-bf16,+avx512f -verify-machineinstrs | FileCheck %s
 
 define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-LABEL: test_amx:
@@ -22,6 +22,7 @@
 ; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbf16ps %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
 ; CHECK-NEXT:vzeroupper
@@ -33,7 +34,8 @@
   %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
   %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
   %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
+  %d4 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %d3, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d4)
 
   ret void
 }
@@ -44,4 +46,5 @@
 declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -888,6 +888,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
 ShapeT Shape(&MO1, &MO2, MRI);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -159,6 +159,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
 ShapeT Shape(&MO1, &MO2, MRI);
@@ -256,6 +257,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 return true;
   }
 }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -70,7 +70,8 @@
   case Intrinsic::x86_tdpbssd_internal:
   case Intrinsic::x86_tdpbsud_internal:
   case Intrinsic::x86_tdpbusd_internal:
-  case Intrinsic::x86_tdpbuud_internal: {
+  case Intrinsic::x86_tdpbuud_internal:
+  case Intrinsic::x86_tdpbf16ps_internal: {
 switch (OpNo) {
 case 3:
   Row = II->getArgOperand(0);
Index: llvm/lib/Target/X86/X86InstrAMX.td
===
--- llvm/lib/Target/X86/X86InstrAMX.td
+++ llvm/lib/Target/X86/X86InstrAMX.td
@@ -138,6 +138,16 @@
   "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
   []>, VEX_4V, T8XS;
 
+// Pseduo instruction for RA.
+let Constraints = "$src4 = $dst" in
+  def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2, GR16:$src3, TILE:$src4,
+ TILE:$src5, TILE:$src6),
+

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/Headers/amxintrin.h:283
 
+typedef struct __tile1024bf16_str {
+  const unsigned short row;

LuoYuanke wrote:
> pengfei wrote:
> > Is there much value to differentiate the type? We are using the same AMX 
> > type in the builtins. What do you think? @LuoYuanke 
> My first though is that we can reuse __tile1024i for bf16 tile for 2 reasons.
> 1. We don't access the element of the tile. 
> 2. The destination element of amx-int8 is int32 and the destination element 
> of amx-bf16 is float32, the element size is the same.
Does this means that user need to do explicitly type conversion?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97358/new/

https://reviews.llvm.org/D97358

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-24 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 326002.
LiuChen3 added a comment.

Address Pengfei and Yuanke's comments. We don't need more tile type.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97358/new/

https://reviews.llvm.org/D97358

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-int8,+amx-bf16,+avx512f -verify-machineinstrs | FileCheck %s
 
 define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-LABEL: test_amx:
@@ -22,6 +22,7 @@
 ; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbf16ps %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
 ; CHECK-NEXT:vzeroupper
@@ -33,7 +34,8 @@
   %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
   %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
   %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
+  %d4 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %d3, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d4)
 
   ret void
 }
@@ -44,4 +46,5 @@
 declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -888,6 +888,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
 ShapeT Shape(&MO1, &MO2, MRI);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -159,6 +159,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
 ShapeT Shape(&MO1, &MO2, MRI);
@@ -256,6 +257,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 return true;
   }
 }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -70,7 +70,8 @@
   case Intrinsic::x86_tdpbssd_internal:
   case Intrinsic::x86_tdpbsud_internal:
   case Intrinsic::x86_tdpbusd_internal:
-  case Intrinsic::x86_tdpbuud_internal: {
+  case Intrinsic::x86_tdpbuud_internal:
+  case Intrinsic::x86_tdpbf16ps_internal: {
 switch (OpNo) {
 case 3:
   Row = II->getArgOperand(0);
Index: llvm/lib/Target/X86/X86InstrAMX.td
===
--- llvm/lib/Target/X86/X86InstrAMX.td
+++ llvm/lib/Target/X86/X86InstrAMX.td
@@ -138,6 +138,16 @@
   "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
   []>, VEX_4V, T8XS;
 
+// Pseduo instruction for RA.
+let Constraints = "$src4 = $dst" in
+  def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+ GR16:$src2, GR16:$src3, TILE:$src4,
+ TILE

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-24 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

I don't know why pre-merge-checks failed. I can check-all successfully locally 
in redhat8. I don't have  debian mainchine to reproduce this problem.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97358/new/

https://reviews.llvm.org/D97358

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D97358: [X86] Support amx-bf16 intrinsic.

2021-02-24 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4bc7c8631ad6: [X86] Support amx-bf16 intrinsic. (authored by 
LiuChen3).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97358/new/

https://reviews.llvm.org/D97358

Files:
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/lib/Headers/amxintrin.h
  clang/test/CodeGen/X86/amx_api.c
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/lib/Target/X86/X86ExpandPseudo.cpp
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrAMX.td
  llvm/lib/Target/X86/X86LowerAMXType.cpp
  llvm/lib/Target/X86/X86PreTileConfig.cpp
  llvm/lib/Target/X86/X86RegisterInfo.cpp
  llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Index: llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
===
--- llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-int8,+amx-bf16,+avx512f -verify-machineinstrs | FileCheck %s
 
 define void @test_amx(i8* %pointer, i8* %base, i64 %stride) {
 ; CHECK-LABEL: test_amx:
@@ -22,6 +22,7 @@
 ; CHECK-NEXT:tdpbsud %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbusd %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tdpbuud %tmm2, %tmm1, %tmm0
+; CHECK-NEXT:tdpbf16ps %tmm2, %tmm1, %tmm0
 ; CHECK-NEXT:tilestored %tmm0, (%rdi,%rdx)
 ; CHECK-NEXT:tilerelease
 ; CHECK-NEXT:vzeroupper
@@ -33,7 +34,8 @@
   %d1 = call x86_amx @llvm.x86.tdpbsud.internal(i16 8, i16 8, i16 8, x86_amx %d0, x86_amx %a, x86_amx %b)
   %d2 = call x86_amx @llvm.x86.tdpbusd.internal(i16 8, i16 8, i16 8, x86_amx %d1, x86_amx %a, x86_amx %b)
   %d3 = call x86_amx @llvm.x86.tdpbuud.internal(i16 8, i16 8, i16 8, x86_amx %d2, x86_amx %a, x86_amx %b)
-  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d3)
+  %d4 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16 8, i16 8, i16 8, x86_amx %d3, x86_amx %a, x86_amx %b)
+  call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %d4)
 
   ret void
 }
@@ -44,4 +46,5 @@
 declare x86_amx @llvm.x86.tdpbsud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbusd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
+declare x86_amx @llvm.x86.tdpbf16ps.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
 declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
Index: llvm/lib/Target/X86/X86RegisterInfo.cpp
===
--- llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -888,6 +888,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = MI->getOperand(1);
 MachineOperand &MO2 = MI->getOperand(2);
 ShapeT Shape(&MO1, &MO2, MRI);
Index: llvm/lib/Target/X86/X86PreTileConfig.cpp
===
--- llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -159,6 +159,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 MachineOperand &MO1 = const_cast(MI.getOperand(1));
 MachineOperand &MO2 = const_cast(MI.getOperand(2));
 ShapeT Shape(&MO1, &MO2, MRI);
@@ -256,6 +257,7 @@
   case X86::PTDPBUSDV:
   case X86::PTDPBUUDV:
   case X86::PTILEZEROV:
+  case X86::PTDPBF16PSV:
 return true;
   }
 }
Index: llvm/lib/Target/X86/X86LowerAMXType.cpp
===
--- llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -70,7 +70,8 @@
   case Intrinsic::x86_tdpbssd_internal:
   case Intrinsic::x86_tdpbsud_internal:
   case Intrinsic::x86_tdpbusd_internal:
-  case Intrinsic::x86_tdpbuud_internal: {
+  case Intrinsic::x86_tdpbuud_internal:
+  case Intrinsic::x86_tdpbf16ps_internal: {
 switch (OpNo) {
 case 3:
   Row = II->getArgOperand(0);
Index: llvm/lib/Target/X86/X86InstrAMX.td
===
--- llvm/lib/Target/X86/X86InstrAMX.td
+++ llvm/lib/Target/X86/X86InstrAMX.td
@@ -138,6 +138,16 @@
   "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
   []>, VEX_4V, T8XS;
 
+// Pseduo instruction for RA.
+let Constraints = "$src4 = $dst" in
+  def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
+

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-03-23 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked an inline comment as done.
LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1570
+return 4;
+} else if (Align < 16)
+  return MinABIStackAlignInBytes;

LiuChen3 wrote:
> jyknight wrote:
> > If I understood GCC's algorithm correctly, I think this needs to come first?
> You mean it should be ?
> 
> 
> ```
> if (MaxAlignment < 16)
>   retrun 4
> else
>  return std::max(MaxAlignment, Align);
> ```
I found that the test I wrote above was wrong. Sorry for the noise.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60748: Fix i386 struct and union parameter alignment

2020-03-15 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Ping?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60748: Fix i386 struct and union parameter alignment

2020-03-15 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 250483.
LiuChen3 added a comment.
Herald added a subscriber: arichardson.

rebase.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/x86_32-align-linux.c
  clang/test/CodeGen/x86_32-align-linux.cpp
  clang/test/CodeGen/x86_32-arguments-linux.c

Index: clang/test/CodeGen/x86_32-arguments-linux.c
===
--- clang/test/CodeGen/x86_32-arguments-linux.c
+++ clang/test/CodeGen/x86_32-arguments-linux.c
@@ -5,19 +5,19 @@
 // CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
 // CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4 %0,
 // CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4 %1,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4 %2,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4 %3,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 4 %4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 4 %5)
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 16 %a7,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 16 %a9,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 32 %a11,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 32 %a13)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
 // CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
 // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}})
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 16 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 16 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 32 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 32 %{{[^ ]*}})
 // CHECK: }
 //
 //  [i386] clang misaligns long double in structures
Index: clang/test/CodeGen/x86_32-align-linux.cpp
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+class __attribute__((aligned(64))) X1 {
+  class  __attribute__((aligned(32))) {
+   __m128 a1;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X2 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint16 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(32))) X3 {
+  class __attribute__((aligned(64))) {
+int a1;
+alignedint16 a2;
+  } a;
+ int b;
+};
+
+class __attribute__((aligned(16))) X4 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint64 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X5 {
+  int x;
+};
+
+class __attribute__((aligned(64))) X6 {
+  int x;
+  alignedint64 y;
+};
+
+extern void foo(int, ...);
+
+class X1 x1;
+class X2 x2;
+class X3 x3;
+class X4 x4;
+class X5 x5;
+class X6 x6;
+
+// CHECK-LABEL: define void @_Z4testv()
+// CHECK: entry:
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X1* byval(%class.X1) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X2* byval(%class.X2) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X3* byval(%class.X3) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X4* byval(%class.X4) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X5* byval(%class.X5) align 4
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X6* byval(%class.X6) align 64
+
+void test(void)
+{
+  foo(1, x1);
+  foo(1, x2);
+  foo(1, x3);
+  foo(1, x4);
+  foo(1, x5);
+  foo(1, x6);
+}
Index: clang/test/CodeGen/x86_32-align-linux.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef union {
+  int d[4];
+   __m128 m;
+} M128;
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+st

[PATCH] D60748: Fix i386 struct and union parameter alignment

2020-03-16 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D60748#1924794 , @rjmccall wrote:

> Oh, I see you just updated your patch months ago without ever mentioning that 
> it was ready for review.
>
> It sounds to me like GCC retroactively added a switch specifying which 
> version of the ABI to follow on this point, somewhat confusingly called 
> `-malign-data`.  That's probably the right move here for us, too, especially 
> since FreeBSD says they'd like to use it.  That also means the condition of 
> when to use your new logic will have to change; basically, we need a 
> CodeGenOption for this that will default to the old ABI, and the driver will 
> pass down a different default on Linux.


Thanks for review.
 `-malign-data` is another topic. Just like what I said above, at least 
`-malign-data` will not affect the calling convention of struct and union. I 
agree with you that adding an option to control this new logi. I'll working on 
it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-03-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 251290.
LiuChen3 retitled this revision from " Fix i386 struct and union parameter 
alignment" to "Adds an option "malign-pass-aggregate" to make the alignment of 
the struct and union parameters compatible with the default gcc".
LiuChen3 edited the summary of this revision.
LiuChen3 added a comment.

Add an option "malign-pass-aggregate" to compatible with gcc default passing 
struct and union.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGen/x86_32-align-linux.c
  clang/test/CodeGen/x86_32-align-linux.cpp

Index: clang/test/CodeGen/x86_32-align-linux.cpp
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -malign-pass-aggregate -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+class __attribute__((aligned(64))) X1 {
+  class  __attribute__((aligned(32))) {
+   __m128 a1;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X2 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint16 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(32))) X3 {
+  class __attribute__((aligned(64))) {
+int a1;
+alignedint16 a2;
+  } a;
+ int b;
+};
+
+class __attribute__((aligned(16))) X4 {
+  class  __attribute__((aligned(32))) {
+int a1;
+alignedint64 a2;
+  } a;
+  int b;
+};
+
+class __attribute__((aligned(64))) X5 {
+  int x;
+};
+
+class __attribute__((aligned(64))) X6 {
+  int x;
+  alignedint64 y;
+};
+
+extern void foo(int, ...);
+
+class X1 x1;
+class X2 x2;
+class X3 x3;
+class X4 x4;
+class X5 x5;
+class X6 x6;
+
+// CHECK-LABEL: define void @_Z4testv()
+// CHECK: entry:
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X1* byval(%class.X1) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X2* byval(%class.X2) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X3* byval(%class.X3) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X4* byval(%class.X4) align 64
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X5* byval(%class.X5) align 4
+// CHECK: call void (i32, ...) @_Z3fooiz(i32 1, %class.X6* byval(%class.X6) align 64
+
+void test(void)
+{
+  foo(1, x1);
+  foo(1, x2);
+  foo(1, x3);
+  foo(1, x4);
+  foo(1, x5);
+  foo(1, x6);
+}
Index: clang/test/CodeGen/x86_32-align-linux.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -malign-pass-aggregate -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef union {
+  int d[4];
+   __m128 m;
+} M128;
+
+typedef __attribute__((aligned(16))) int alignedint16;
+typedef __attribute__((aligned(64))) int alignedint64;
+
+struct __attribute__((aligned(64))) X1 {
+ struct  __attribute__((aligned(32))) {
+  int a1;
+ } a;
+ int b;
+};
+
+
+struct __attribute__((aligned(64))) X2 {
+ struct  __attribute__((aligned(32))) {
+  int a1;
+  alignedint16 a2;
+ } a;
+ int b;
+};
+
+struct __attribute__((aligned(32))) X3 {
+ struct __attribute__((aligned(64))) {
+  int a1;
+  alignedint16 a2;
+ } a;
+ int b;
+};
+
+struct __attribute__((aligned(16))) X4 {
+ struct  __attribute__((aligned(32))) {
+  int a1;
+  alignedint64 a2;
+ } a;
+ int b;
+};
+
+struct __attribute__((aligned(64))) X5 {
+  int x;
+};
+
+struct __attribute__((aligned(64))) X6 {
+ int x;
+ alignedint64 y;
+};
+
+union U1 {
+ struct __attribute__((aligned(32))) {
+  int i;
+  __m128 m;
+ };
+ int b;
+};
+
+extern void foo(int, ...);
+
+M128 a;
+struct X1 x1;
+struct X2 x2;
+struct X3 x3;
+struct X4 x4;
+struct X5 x5;
+struct X6 x6;
+union  U1 u1;
+
+// CHECK-LABEL: define void @test
+// CHECK: entry:
+// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval(%union.M128) align 16
+// CHECK: call void (i32, ...) @foo(i32 1, <4 x float>
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X1* byval(%struct.X1) align 4
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X2* byval(%struct.X2) align 64
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X3* byval(%struct.X3) align 64
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X4* byval(%struct.X4) align 64
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X5* byval(%struct.X5) align 4
+// CHECK: call void (i32, ...) @foo(i32 1, %struct.X6* byval(%struct.X6) align 64
+// CHECK: call void (i32, ...) @foo(i32 1, %union.U1* byval(%union.U1) align 32
+void test(void)
+{
+  foo(1, a);
+  foo(1, a.m);
+  foo(1, x1);
+  fo

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-03-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked 3 inline comments as done.
LiuChen3 added a comment.

In D60748#1931440 , @jyknight wrote:

> Since the ABI this is trying to match is not documented literally anywhere, I 
> think we need to have some confidence that what this implements is actually 
> the same as what GCC does. While I wrote up what I think the algorithm is, 
> without some sort of script to allow testing it against a bunch of examples, 
> I wouldn't say I'm confident of its correctness.
>
> I'm not sure if you can reverse-engineer what the alignment must have been 
> from the assembly output, or from some debug flags. Or if maybe doing 
> something silly like modifying the source to insert a printf would be the 
> best method to test this.


I think at least the initial patch is correct.




Comment at: clang/include/clang/Basic/LangOptions.def:353
 
+VALUE_LANGOPT(AlignPassingAggregate, 1, 0, "Compatible with gcc default 
passing struct and union (x86 only).")
+

rnk wrote:
> If only codegen needs to know, a CodeGenOption would be better.
The backend does not need this option information.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1556
+
+  for (RecordDecl::field_iterator i = RD->field_begin(), e = 
RD->field_end();
+   i != e; ++i) {

rnk wrote:
> Any time you crack open a record to look at the fields, the code is probably 
> wrong the first time you write it. :( In this case, I suspect you are not 
> looking at base classes. Consider:
> ```
> struct A {
>   MyAlignedType Field;
> };
> struct B : A {};
> void passbyval(B o);
> ```
I'm not sure if I understand what you mean.



```
typedef __attribute__((aligned(16))) int alignedint16;
typedef __attribute__((aligned(64))) int alignedint64;
struct __attribute__((aligned(64))) X2 {
  struct  __attribute__((aligned(32))) {
int a1;
alignedint16 a2;
  } a;
  int b;
};
struct B : X2{};
void test(B b)
{
  std::cout << b.a.a2 << std::endl;
}
```
This can pass.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1542-1544
+// i386 System V ABI 2.1: Structures and unions assume the alignment of 
their
+// most strictly aligned component.
+//

jyknight wrote:
> This comment isn't useful. While it may be what the System V ABI document 
> says, that's clearly incorreect, and is  not what the code is or should be 
> doing. Please document what is actually implemented, instead.
Sorry I forget to change it.



Comment at: clang/test/CodeGen/x86_32-align-linux.cpp:9
+
+class __attribute__((aligned(64))) X1 {
+  class  __attribute__((aligned(32))) {

jyknight wrote:
> Confused me that this was a different X1 than in the test-case above. I'm not 
> sure why the tests need to be duplicated here in a .cpp file in the first 
> place?
Sorry that I don't know much about front-end tests. I thought class, struct and 
union all need to be tested.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-03-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

> I think at least the initial patch is correct.

I re-read your comment above, please ignore this sentence. Sorry for the noise.
My question now is that since we cannot guarantee that we are doing the right 
thing, is this patch necessary?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D60748: Adds an option "malign-pass-aggregate" to make the alignment of the struct and union parameters compatible with the default gcc

2020-03-20 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 marked 3 inline comments as done.
LiuChen3 added inline comments.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1559
+  if (const auto *AT = QT->getAsArrayTypeUnsafe())
+TempAlignment = getContext().getTypeAlign(AT->getElementType()) / 
8;
+  else // recursively to get each type's alignment

jyknight wrote:
> Also needs to call getTypeStackAlignInBytes?
I think this is enough.

```
struct __attribute__((aligned(16))) X6 {
 int x;
 struct X1 x1[5];
// alignedint64 y;
};

void test(int a, struct X6 x6)
{
  printf(%u\n", __alignof__(x6));
}
```
This will output 64.



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1567
+  if (MaxAlignment >= 16)
+return std::max(MaxAlignment, Align);
+  else // return 4 when all the type alignments less than 16 bytes

jyknight wrote:
> I think this is wrong and that it should only return Align. The computation 
> of the alignment of the elements is only to see if their alignment is >= 16.
> 
> If the alignment of the elements' types is >= 16, but the alignment of the 
> structure is less than the alignment of one of its elements (e.g. due to 
> `__attribute__ packed`), we should return the alignment of the structure.
I write a test,  I do n’t know if it matches your meaning.

```
struct __attribute__((aligned(4))) X6 {
 int x;
 alignedint64 y;
};

void test(int a, struct X6 x6)
{
  printf("%u\n", __alignof__(x6));
}
```
The output of gcc is 64.

If we use packed attribute:


```
struct __attribute__((packed)) X6 {
 int x;
 alignedint64 y;
};
```

Both gcc and clang with this patch output "1". And I found that the packed 
struct is not processed by this function.
So I think it should return the MaxAlignment .



Comment at: clang/lib/CodeGen/TargetInfo.cpp:1570
+return 4;
+} else if (Align < 16)
+  return MinABIStackAlignInBytes;

jyknight wrote:
> If I understood GCC's algorithm correctly, I think this needs to come first?
You mean it should be ?


```
if (MaxAlignment < 16)
  retrun 4
else
 return std::max(MaxAlignment, Align);
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D89105: [X86] Support Intel avxvnni

2022-01-24 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/lib/Headers/cpuid.h:199
 /* Features in %eax for leaf 7 sub-leaf 1 */
+#define bit_AVXVNNI   0x0008
 #define bit_AVX512BF160x0020

craig.topper wrote:
> RKSimon wrote:
> > @liuchen3 https://github.com/llvm/llvm-project/issues/53392 reports this 
> > should be 0x0010?
> Maybe we should start writing these as 1 << 4. This is not the first shifted 
> by one issue we've had in this file.
It my fault. I have a fix here: https://reviews.llvm.org/D118103 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89105/new/

https://reviews.llvm.org/D89105

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D118103: [X86]Fix the wrong value of bit_AVXVNNI

2022-01-24 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf6984b299afc: Fix the wrong value of bit_AVXVNNI (authored 
by LiuChen3).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118103/new/

https://reviews.llvm.org/D118103

Files:
  clang/lib/Headers/cpuid.h


Index: clang/lib/Headers/cpuid.h
===
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -200,7 +200,7 @@
 #define bit_AMXINT8   0x0200
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
-#define bit_AVXVNNI   0x0008
+#define bit_AVXVNNI   0x0010
 #define bit_AVX512BF160x0020
 #define bit_HRESET0x0040
 


Index: clang/lib/Headers/cpuid.h
===
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -200,7 +200,7 @@
 #define bit_AMXINT8   0x0200
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
-#define bit_AVXVNNI   0x0008
+#define bit_AVXVNNI   0x0010
 #define bit_AVX512BF160x0020
 #define bit_HRESET0x0040
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-03-29 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added subscribers: jansvoboda11, dang, pengfei, hiraditya.
LiuChen3 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

With AVX the performance for aligned vector move and unaligned vector move on 
X86
are the same if the address is aligned. In this case we prefer to use unaligned
move because it can avoid some run time exceptions.
"-fuse-unaligned-vector-move" and "-fno-use-unaligned-vector-move" are added to
enable this preference. This transform is disabled as default.
Doing the transform in assembly because we want to avoid doing any change of IR
and flags.

This patch is a replacement of D88396 .


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D99565

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/test/Driver/x86-unaligned-vector-move.c
  llvm/lib/Target/X86/X86MCInstLower.cpp
  llvm/test/CodeGen/X86/avx-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll

Index: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
@@ -0,0 +1,747 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X86
+
+define <8 x i32> @test_256_1(i8 * %addr) {
+; CHECK-LABEL: test_256_1:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_1:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_1:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res
+}
+
+define <8 x i32> @test_256_2(i8 * %addr) {
+; CHECK-LABEL: test_256_2:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0 # AlignMOV convert to UnAlignMOV
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_2:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0 # AlignMOV convert to UnAlignMOV
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_2:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0 # AlignMOV convert to UnAlignMOV
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
+  ret <8 x i32>%res
+}
+
+define void @test_256_3(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_3:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi) # AlignMOV convert to UnAlignMOV
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_3:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi) # AlignMOV convert to UnAlignMOV
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_3:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax) # AlignMOV convert to UnAlignMOV
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
+  ret void
+}
+
+define void @test_256_4(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_4:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_4:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_4:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
+  ret void
+}
+
+define void @test_256_5(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_5:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi) # AlignMOV convert to UnAlignMOV
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_5:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi) # AlignMOV convert to UnAlignMOV
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_5:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax) # AlignMOV convert to UnAlignMOV
+; X86-NEXT:vzeroupp

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-03-30 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added inline comments.



Comment at: clang/include/clang/Driver/Options.td:1642
+Group, Flags<[CoreOption, HelpHidden]>,
+HelpText<"Enable transforming aligned vector move instruction to "
+ "unaligned vector move.">;

craig.topper wrote:
> As far the user is concerned this isn’t a transform. From their perspective 
> it’s always use unaligned move instructions.
How about "Always emit unaligned move instructions." ? Do you have any 
suggestion here?



Comment at: llvm/lib/Target/X86/X86MCInstLower.cpp:2708
+  // enabled.
+  if (EnableX86UnalignedVecMove) {
+for (const auto Pair : AlignedMovToUnalignedMovTable) {

Forgot to check the SSE level. I will add in next patch.



Comment at: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll:6
+define <8 x i32> @test_256_1(i8 * %addr) {
+; CHECK-LABEL: test_256_1:
+; CHECK:   # %bb.0:

craig.topper wrote:
> CHECK isn’t a valid prefix for this file
Thanks for reminding. I forgot to delete the these. I will remove these in next 
patch.



Comment at: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll:21
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res

craig.topper wrote:
> What are the tests with align 1 intended to show?
This is to distinguish the unaligned-mov converted from aligned-move and the 
original unaligned-move. See the difference between line12 and line28.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-12 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D99565#2678073 , @craig.topper 
wrote:

> I think I wouldn't mind if we just didn't emit aligned loads/store 
> instructions for AVX/AVX512 from isel and other places in the compiler in the 
> first place. As noted, if the load gets folded the alignment check doesn't 
> happen. That would reduce the size of the isel tables and remove branches, 
> reducing complexity of the compiler. Adding a new step and a command line to 
> undo the earlier decision increases complexity.
>
> The counter argument to that is that the alignment check has found bugs in 
> the vectorizer on more than one occasion that I know of.

Can I understand that if we implement it in isel, you will no longer oppose 
this patch?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 337057.
LiuChen3 added a comment.

1. Rebase;
2. Emit unaligned move in ISEL;
3. Only do the conversion on AVX machine.

I am still working on fast-isel.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/test/Driver/x86-unaligned-vector-move.c
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/avx-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll

Index: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
@@ -0,0 +1,747 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X86
+
+define <8 x i32> @test_256_1(i8 * %addr) {
+; CHECK-LABEL: test_256_1:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_1:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_1:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res
+}
+
+define <8 x i32> @test_256_2(i8 * %addr) {
+; CHECK-LABEL: test_256_2:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_2:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_2:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
+  ret <8 x i32>%res
+}
+
+define void @test_256_3(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_3:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_3:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_3:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
+  ret void
+}
+
+define void @test_256_4(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_4:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_4:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_4:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
+  ret void
+}
+
+define void @test_256_5(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_5:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_5:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_5:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
+  ret void
+}
+
+define  <4 x i64> @test_256_6(i8 * %addr) {
+; CHECK-LABEL: test_256_6:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_6:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_6:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
+  ret <4 x i64>%res
+}
+
+define void @test_256_7(i8 * %addr, <4 x i64> %data) {
+;

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D99565#2682809 , @lebedev.ri wrote:

> I'm still uncomfortable with changing current status quo, even though i 
> obviously don't get to cast the final vote here.
>
> One should not use aligned loads in hope that they will cause an exception to 
> detect address misalignment.
> That's UBSan's job. `-fsanitize=undefined`/`-fsanitize=aligment` *should* 
> catch it.
> If it does not do so in your particular case, please file a bug, i would like 
> to take a look.
>
> Likewise, i don't think one should do overaligned loads and hope that they 
> will just work.
> UB is UB. The code will still be miscompiled, but you've just hidden your 
> warning.
>
> Likewise, even if unaligned loads can be always used, i would personally find 
> it pretty surprising
> to suddenly see unaliged loads instead of aligned ones.
> Also, isn't that only possible/so when AVX is available?
> Also, doesn't that cause compiler lock-in?
> What happens without AVX? Do so anyways at the perfomance's cost?
> Or back to exceptions?
>
> Should this process in any form other than the UBSan changes,
> i would like to first see a RFC on llvm-dev.
> Sorry about being uneasy about this. :S

We are happy to hear your voice. We will discuss this on llvm-dev later after 
confirming Craigs's opinion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D99565#2678073 , @craig.topper 
wrote:

> I think I wouldn't mind if we just didn't emit aligned loads/store 
> instructions for AVX/AVX512 from isel and other places in the compiler in the 
> first place. As noted, if the load gets folded the alignment check doesn't 
> happen. That would reduce the size of the isel tables and remove branches, 
> reducing complexity of the compiler. Adding a new step and a command line to 
> undo the earlier decision increases complexity.
>
> The counter argument to that is that the alignment check has found bugs in 
> the vectorizer on more than one occasion that I know of.

Hi, @craig.topper. I'm not sure if I understand what you mean correctly. Do you 
mean we can remove the alignload/alignstore pattern match so that we can reduce 
the size of the isel tables? But this means that there is no option to control 
this behavior.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 337064.
LiuChen3 added a comment.

Address Craig's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/test/Driver/x86-unaligned-vector-move.c
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/avx-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll

Index: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
@@ -0,0 +1,747 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -mattr=avx512vl -x86-enable-unaligned-vector-move | FileCheck %s -check-prefix=X86
+
+define <8 x i32> @test_256_1(i8 * %addr) {
+; CHECK-LABEL: test_256_1:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_1:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_1:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res
+}
+
+define <8 x i32> @test_256_2(i8 * %addr) {
+; CHECK-LABEL: test_256_2:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_2:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_2:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
+  ret <8 x i32>%res
+}
+
+define void @test_256_3(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_3:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_3:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_3:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
+  ret void
+}
+
+define void @test_256_4(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_4:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_4:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_4:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
+  ret void
+}
+
+define void @test_256_5(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_5:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_5:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_5:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
+  ret void
+}
+
+define  <4 x i64> @test_256_6(i8 * %addr) {
+; CHECK-LABEL: test_256_6:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_6:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_6:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
+  ret <4 x i64>%res
+}
+
+define void @test_256_7(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_7:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; C

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 created this revision.
Herald added a subscriber: pengfei.
LiuChen3 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Support for _mm512_i32logather_pd, _mm512_mask_i32logather_pd,
_mm512_i32logather_epi64, _mm512_mask_i32logather_epi64, _mm512_i32loscatter_pd,
_mm512_mask_i32loscatter_pd.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,39 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,129 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
+  _mm512_i32gather_epi64

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

In D100368#2685189 , @RKSimon wrote:

> Add _mm512_i32loscatter_epi64  and _mm512_mask_i32loscatter_epi64 for 
> completeness?

It seems we doesn't support KNCNI in LLVM. Am I right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 337301.
LiuChen3 added a comment.

Address Simon's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,39 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,130 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
+  _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in t

[PATCH] D99565: [X86] Support replacing aligned vector moves with unaligned moves when avx is enabled.

2021-04-13 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 337333.
LiuChen3 added a comment.

Address Craig's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99565/new/

https://reviews.llvm.org/D99565

Files:
  clang/include/clang/Driver/Options.td
  clang/test/Driver/x86-target-features.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/avx-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512-unaligned-load-store.ll
  llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll

Index: llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/avx512vl-unaligned-load-store.ll
@@ -0,0 +1,747 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+use-unaligned-vector-move | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512vl,+use-unaligned-vector-move | FileCheck %s -check-prefix=X86
+
+define <8 x i32> @test_256_1(i8 * %addr) {
+; CHECK-LABEL: test_256_1:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_1:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_1:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res
+}
+
+define <8 x i32> @test_256_2(i8 * %addr) {
+; CHECK-LABEL: test_256_2:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_2:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_2:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
+  ret <8 x i32>%res
+}
+
+define void @test_256_3(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_3:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_3:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_3:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
+  ret void
+}
+
+define void @test_256_4(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_4:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_4:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_4:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
+  ret void
+}
+
+define void @test_256_5(i8 * %addr, <8 x i32> %data) {
+; CHECK-LABEL: test_256_5:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_5:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups %ymm0, (%rdi)
+; X64-NEXT:vzeroupper
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_5:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups %ymm0, (%eax)
+; X86-NEXT:vzeroupper
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
+  ret void
+}
+
+define  <4 x i64> @test_256_6(i8 * %addr) {
+; CHECK-LABEL: test_256_6:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups (%rdi), %ymm0
+; CHECK-NEXT:retq
+; X64-LABEL: test_256_6:
+; X64:   # %bb.0:
+; X64-NEXT:vmovups (%rdi), %ymm0
+; X64-NEXT:retq
+;
+; X86-LABEL: test_256_6:
+; X86:   # %bb.0:
+; X86-NEXT:movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:vmovups (%eax), %ymm0
+; X86-NEXT:retl
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
+  ret <4 x i64>%res
+}
+
+define void @test_256_7(i8 * %addr, <4 x i64> %data) {
+; CHECK-LABEL: test_256_7:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmovups %ymm0, (%rdi)
+; CHECK-NEXT:vzeroupper
+; CHECK-NEXT:retq

[PATCH] D78564: [i386] Modify the alignment of m128/m256/__m512 vector type according i386 abi.

2021-04-14 Thread LiuChen via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1c4108ab661d: [i386] Modify the alignment of 
__m128/__m256/__m512 vector type according i386… (authored by LiuChen3).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78564/new/

https://reviews.llvm.org/D78564

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/x86_32-align-linux.c

Index: clang/test/CodeGen/x86_32-align-linux.c
===
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o - %s | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: define dso_local void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 31
+// CHECK-NEXT:  %2 = and i32 %1, -32
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+  __m256 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m256);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 63
+// CHECK-NEXT:  %2 = and i32 %1, -64
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define dso_local void @testPastArguments
+// CHECK: call void (i32, ...) @testm128(i32 1, <4 x float> %0)
+// CHECK: call void (i32, ...) @testm256(i32 1, <8 x float> %1)
+// CHECK: call void (i32, ...) @testm512(i32 1, <16 x float> %2)
+void testPastArguments(void) {
+  __m128 a;
+  __m256 b;
+  __m512 c;
+  testm128(1, a);
+  testm256(1, b);
+  testm512(1, c);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1105,6 +1105,7 @@
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
+  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -1167,9 +1168,9 @@
 unsigned NumRegisterParameters, bool SoftFloatABI)
 : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
   IsRetSmallStructInRegABI(RetSmallStructInRegABI),
-  IsWin32StructABI(Win32StructABI),
-  IsSoftFloatABI(SoftFloatABI),
+  IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
   IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+  IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
   DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef scalars,
@@ -1594,6 +1595,14 @@
   if (Align <= MinABIStackAlignInBytes)
 return 0; // Use default alignment.
 
+  if (IsLinuxABI) {
+// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+// want to spend any effort dealing with the ramifications of ABI breaks.
+//
+// If the vector type is __m128/__m256/__m512, return the default alignment.
+if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64))
+  return Align;
+  }
   // On non-Darwin, the stack type alignment is always 4.
   if (!IsDarwinVectorABI) {
 // Set explicit alignment, since we may need to realign the top.
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D78564: [i386] Modify the alignment of m128/m256/__m512 vector type according i386 abi.

2021-04-14 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 added a comment.

Thanks for your review. Hope this patch won't cause too many ABI issues in the 
future.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D78564/new/

https://reviews.llvm.org/D78564

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 338443.
LiuChen3 added a comment.

1. Rebase.
2. Adding _mm512_i32loscatter_epi64 and _mm512_mask_i32loscatter_epi64.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corres

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread LiuChen via Phabricator via cfe-commits

LiuChen3 updated this revision to Diff 338445.
LiuChen3 added a comment.

Fix format issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation
+///

1 2 >

1 - 100 of 102 matches

Mail list logo