[PATCH] D58343: Enablement for AMD znver2 architecture - skeleton patch

Ganesh Gopalasubramanian via Phabricator via cfe-commits Tue, 19 Feb 2019 08:59:26 -0800

GGanesh updated this revision to Diff 187386.

Repository:
  rL LLVM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58343/new/

https://reviews.llvm.org/D58343

Files:
  include/llvm/Support/X86TargetParser.def
  lib/Support/Host.cpp
  lib/Target/X86/X86.td
  test/CodeGen/X86/cpus-amd.ll
  test/CodeGen/X86/lzcnt-zext-cmp.ll
  test/CodeGen/X86/slow-unaligned-mem.ll
  test/CodeGen/X86/x86-64-double-shifts-var.ll

Index: test/CodeGen/X86/x86-64-double-shifts-var.ll
===================================================================
--- test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -13,8 +13,9 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
 ; instructions.
 
@@ -25,7 +26,7 @@
 
 define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
 entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
   %sh_prom = zext i32 %c to i64
   %shl = shl i64 %a, %sh_prom
   %sub = sub nsw i32 64, %c
Index: test/CodeGen/X86/slow-unaligned-mem.ll
===================================================================
--- test/CodeGen/X86/slow-unaligned-mem.ll
+++ test/CodeGen/X86/slow-unaligned-mem.ll
@@ -47,6 +47,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
Index: test/CodeGen/X86/lzcnt-zext-cmp.ll
===================================================================
--- test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
Index: test/CodeGen/X86/cpus-amd.ll
===================================================================
--- test/CodeGen/X86/cpus-amd.ll
+++ test/CodeGen/X86/cpus-amd.ll
@@ -26,6 +26,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: lib/Target/X86/X86.td
===================================================================
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -1144,15 +1144,14 @@
   FeatureMacroFusion
 ]>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
   FeatureBMI,
   FeatureBMI2,
   FeatureCLFLUSHOPT,
-  FeatureCLZERO,
   FeatureCMOV,
   Feature64Bit,
   FeatureCMPXCHG16B,
@@ -1184,6 +1183,21 @@
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
+class Znver1Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+  FeatureCLZERO
+]>;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+  FeatureCLZERO,
+  FeatureCLWB,
+  FeatureRDPID,
+  FeatureWBNOINVD
+]>;
+def : Znver2Proc<"znver2">;
+
 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
Index: lib/Support/Host.cpp
===================================================================
--- lib/Support/Host.cpp
+++ lib/Support/Host.cpp
@@ -916,7 +916,14 @@
     break; // "btver2"
   case 23:
     *Type = X86::AMDFAM17H;
-    *Subtype = X86::AMDFAM17H_ZNVER1;
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = X86::AMDFAM17H_ZNVER2;
+      break; // "znver2"; 30h-3fh: Zen2
+    }
+    if (Model <= 0x0f) {
+      *Subtype = X86::AMDFAM17H_ZNVER1;
+      break; // "znver1"; 00h-0Fh: Zen1
+    }
     break;
   default:
     break; // "generic"
Index: include/llvm/Support/X86TargetParser.def
===================================================================
--- include/llvm/Support/X86TargetParser.def
+++ include/llvm/Support/X86TargetParser.def
@@ -90,6 +90,7 @@
 X86_CPU_SUBTYPE_COMPAT("bdver3",         AMDFAM15H_BDVER3,            "bdver3")
 X86_CPU_SUBTYPE_COMPAT("bdver4",         AMDFAM15H_BDVER4,            "bdver4")
 X86_CPU_SUBTYPE_COMPAT("znver1",         AMDFAM17H_ZNVER1,            "znver1")
+X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
 X86_CPU_SUBTYPE_COMPAT("ivybridge",      INTEL_COREI7_IVYBRIDGE,      "ivybridge")
 X86_CPU_SUBTYPE_COMPAT("haswell",        INTEL_COREI7_HASWELL,        "haswell")
 X86_CPU_SUBTYPE_COMPAT("broadwell",      INTEL_COREI7_BROADWELL,      "broadwell")

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D58343: Enablement for AMD znver2 architecture - skeleton patch

Reply via email to