Hi again
On Wednesday 11 December 2013, Uros Bizjak wrote:
> Hello!
> 
> > PR gcc/59422
> > 
> > This patch extends the supported targets for function multi versiong to
> > also include Haswell, Silvermont, and the most recent AMD models. It
> > also prioritizes AVX2 versions over AMD specific pre-AVX2 versions.
> 
> Please add a ChangeLog entry and attach the complete patch. Please
> also state how you tested the patch, as outlined in the instructions
> [1].
> 
> [1] http://gcc.gnu.org/contribute.html
> 
Updated patch for better CPU model detection and added ChangeLog.

The patch has been tested with the attached test.cpp. Verified that it doesn't 
build before the patch, and that it builds after, and verified it selects 
correct versions at runtime based on either CPU model or supported ISA (tested 
on 3 machines: SandyBridge, IvyBridge and Phenom II).

Btw, I couldn't find anything that corresponds to gcc's btver2 arch. Is that 
an old term for what has become the Jaguar architecture?

`Allan
Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 205984)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2013-12-14  Allan Sandfeld Jensen <sandf...@kde.org>
+
+        PR gcc/59422
+        * config/i386/i386.c: Extend function multiversioning
+        to better support recent Intel and AMD models.
+        
 2013-12-14  Marek Polacek  <pola...@redhat.com>
 
 	PR sanitizer/59503
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 205984)
+++ gcc/config/i386/i386.c	(working copy)
@@ -29962,9 +29962,14 @@
     P_PROC_SSE4_2,
     P_POPCNT,
     P_AVX,
+    P_PROC_AVX,
+    P_FMA4,
+    P_XOP,
+    P_PROC_XOP,
+    P_FMA,    
+    P_PROC_FMA,
     P_AVX2,
-    P_FMA,
-    P_PROC_FMA
+    P_PROC_AVX2
   };
 
  enum feature_priority priority = P_ZERO;
@@ -29983,11 +29988,15 @@
       {"sse", P_SSE},
       {"sse2", P_SSE2},
       {"sse3", P_SSE3},
+      {"sse4a", P_SSE4_a},
       {"ssse3", P_SSSE3},
       {"sse4.1", P_SSE4_1},
       {"sse4.2", P_SSE4_2},
       {"popcnt", P_POPCNT},
       {"avx", P_AVX},
+      {"fma4", P_FMA4},
+      {"xop", P_XOP},
+      {"fma", P_FMA},
       {"avx2", P_AVX2}
     };
 
@@ -30041,25 +30050,49 @@
 	      break;
             case PROCESSOR_COREI7_AVX:
               arg_str = "corei7-avx";
-              priority = P_PROC_SSE4_2;
+              priority = P_PROC_AVX;
               break;
+            case PROCESSOR_HASWELL:
+              arg_str = "core-avx2";
+              priority = P_PROC_AVX2;
+              break;
 	    case PROCESSOR_ATOM:
 	      arg_str = "atom";
 	      priority = P_PROC_SSSE3;
 	      break;
+            case PROCESSOR_SLM:
+              arg_str = "slm";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_AMDFAM10:
 	      arg_str = "amdfam10h";
 	      priority = P_PROC_SSE4_a;
 	      break;
+            case PROCESSOR_BTVER1:
+              arg_str = "btver1";
+              priority = P_PROC_SSE4_a;
+              break;
+            case PROCESSOR_BTVER2:
+              arg_str = "btver2";
+              priority = P_PROC_SSE4_2;
+              break;
 	    case PROCESSOR_BDVER1:
 	      arg_str = "bdver1";
-	      priority = P_PROC_FMA;
+	      priority = P_PROC_XOP;
 	      break;
 	    case PROCESSOR_BDVER2:
 	      arg_str = "bdver2";
 	      priority = P_PROC_FMA;
 	      break;
-	    }  
+            case PROCESSOR_BDVER3:
+              arg_str = "bdver3";
+              priority = P_PROC_FMA;
+              break;
+            case PROCESSOR_BDVER4:
+              arg_str = "bdver4";
+              priority = P_PROC_AVX2;
+              break;
+            }  
 	}    
     
       cl_target_option_restore (&global_options, &cur_target);
@@ -30919,9 +30952,13 @@
     F_SSE2,
     F_SSE3,
     F_SSSE3,
+    F_SSE4_a,
     F_SSE4_1,
     F_SSE4_2,
     F_AVX,
+    F_FMA4,
+    F_XOP,
+    F_FMA,
     F_AVX2,
     F_MAX
   };
@@ -30938,15 +30975,20 @@
     M_INTEL_CORE2,
     M_INTEL_COREI7,
     M_AMDFAM10H,
+    M_AMDFAM14H,
     M_AMDFAM15H,
     M_INTEL_SLM,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
     M_INTEL_COREI7_SANDYBRIDGE,
+    M_INTEL_COREI7_IVYBRIDGE,
+    M_INTEL_COREI7_HASWELL,
     M_AMDFAM10H_BARCELONA,
     M_AMDFAM10H_SHANGHAI,
     M_AMDFAM10H_ISTANBUL,
+    M_AMDFAM14H_BTVER1,
+    M_AMDFAM14H_BTVER2,
     M_AMDFAM15H_BDVER1,
     M_AMDFAM15H_BDVER2,
     M_AMDFAM15H_BDVER3,
@@ -30968,11 +31010,16 @@
       {"corei7", M_INTEL_COREI7},
       {"nehalem", M_INTEL_COREI7_NEHALEM},
       {"westmere", M_INTEL_COREI7_WESTMERE},
-      {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
+      {"corei7-avx", M_INTEL_COREI7_SANDYBRIDGE},
+      {"core-avx-i", M_INTEL_COREI7_IVYBRIDGE},
+      {"core-avx2", M_INTEL_COREI7_HASWELL},
       {"amdfam10h", M_AMDFAM10H},
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
       {"istanbul", M_AMDFAM10H_ISTANBUL},
+      {"amdfam14h", M_AMDFAM14H},
+      {"btver1", M_AMDFAM14H_BTVER1},
+      {"btver2", M_AMDFAM14H_BTVER2},
       {"amdfam15h", M_AMDFAM15H},
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
@@ -30994,9 +31041,13 @@
       {"sse2",   F_SSE2},
       {"sse3",   F_SSE3},
       {"ssse3",  F_SSSE3},
+      {"sse4a",  F_SSE4_a},
       {"sse4.1", F_SSE4_1},
       {"sse4.2", F_SSE4_2},
       {"avx",    F_AVX},
+      {"fma4",   F_FMA4},
+      {"xop",    F_XOP},
+      {"fma",    F_FMA},
       {"avx2",   F_AVX2}
     };
 
Index: libgcc/ChangeLog
===================================================================
--- libgcc/ChangeLog	(revision 205984)
+++ libgcc/ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2013-12-14  Allan Sandfeld Jensen <sandf...@kde.org>
+
+        PR gcc/59422
+        * config/i386/cpuinfo.c: Detect sse4a, fma4, xop and fma
+        ISAs and recent Intel and AMD models.
+        
 2013-12-12  Zhenqiang Chen  <zhenqiang.c...@arm.com>
 
 	* config.host (arm*-*-uclinux*): Move t-arm before t-bpabi.
Index: libgcc/config/i386/cpuinfo.c
===================================================================
--- libgcc/config/i386/cpuinfo.c	(revision 205984)
+++ libgcc/config/i386/cpuinfo.c	(working copy)
@@ -60,6 +60,7 @@
   INTEL_CORE2,
   INTEL_COREI7,
   AMDFAM10H,
+  AMDFAM14H,
   AMDFAM15H,
   INTEL_SLM,
   CPU_TYPE_MAX
@@ -70,11 +71,17 @@
   INTEL_COREI7_NEHALEM = 1,
   INTEL_COREI7_WESTMERE,
   INTEL_COREI7_SANDYBRIDGE,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
   AMDFAM10H_BARCELONA,
   AMDFAM10H_SHANGHAI,
   AMDFAM10H_ISTANBUL,
+  AMDFAM14H_BTVER1,
+  AMDFAM14H_BTVER2,
   AMDFAM15H_BDVER1,
   AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
   CPU_SUBTYPE_MAX
 };
 
@@ -89,9 +96,13 @@
   FEATURE_SSE2,
   FEATURE_SSE3,
   FEATURE_SSSE3,
+  FEATURE_SSE4_a,
   FEATURE_SSE4_1,
   FEATURE_SSE4_2,
   FEATURE_AVX,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
   FEATURE_AVX2
 };
 
@@ -113,36 +124,43 @@
     {
     /* AMD Family 10h.  */
     case 0x10:
+      __cpu_model.__cpu_type = AMDFAM10H;
       switch (model)
 	{
 	case 0x2:
 	  /* Barcelona.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_BARCELONA;
 	  break;
 	case 0x4:
 	  /* Shanghai.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_SHANGHAI;
 	  break;
 	case 0x8:
 	  /* Istanbul.  */
-	  __cpu_model.__cpu_type = AMDFAM10H;
 	  __cpu_model.__cpu_subtype = AMDFAM10H_ISTANBUL;
 	  break;
 	default:
 	  break;
 	}
       break;
-    /* AMD Family 15h.  */
+    /* AMD Family 14h "Bobcat". */
+    case 0x14:
+      __cpu_model.__cpu_type = AMDFAM14H;
+      if ( model <= 0xf)
+        __cpu_model.__cpu_subtype = AMDFAM14H_BTVER1;
+      break;
+    /* AMD Family 15h "Bulldozer".  */
     case 0x15:
       __cpu_model.__cpu_type = AMDFAM15H;
       /* Bulldozer version 1.  */
       if ( model <= 0xf)
 	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER1;
-      /* Bulldozer version 2.  */
-      if (model >= 0x10 && model <= 0x1f)
-	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;
+      /* Bulldozer version 2 "Piledriver" */
+      if (model >= 0x10 && model <= 0x2f)
+	__cpu_model.__cpu_subtype = AMDFAM15H_BDVER2;      
+      /* Bulldozer version 3 "Steamroller"  */
+      if (model >= 0x30 && model <= 0x4f)
+        __cpu_model.__cpu_subtype = AMDFAM15H_BDVER3;
       break;
     default:
       break;
@@ -196,6 +214,18 @@
 	      __cpu_model.__cpu_type = INTEL_COREI7;
 	      __cpu_model.__cpu_subtype = INTEL_COREI7_SANDYBRIDGE;
 	      break;
+            case 0x3a:
+            case 0x3e:
+              /* Ivy Bridge.  */
+              __cpu_model.__cpu_type = INTEL_COREI7;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_IVYBRIDGE;
+            case 0x3c:
+            case 0x3f:
+            case 0x45:
+            case 0x46:
+              /* Haswell.  */
+              __cpu_model.__cpu_type = INTEL_COREI7;
+              __cpu_model.__cpu_subtype = INTEL_COREI7_HASWELL;
 	    case 0x17:
 	    case 0x1d:
 	      /* Penryn.  */
@@ -242,6 +272,8 @@
     features |= (1 << FEATURE_SSE4_2);
   if (ecx & bit_AVX)
     features |= (1 << FEATURE_AVX);
+  if (ecx & bit_FMA)
+    features |= (1 << FEATURE_FMA);
 
   /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
   if (max_cpuid_level >= 7)
@@ -252,6 +284,23 @@
 	features |= (1 << FEATURE_AVX2);
     }
 
+  unsigned int ext_level;
+  unsigned int eax, ebx;
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      if (ecx & bit_SSE4a)
+        features |= (1 << FEATURE_SSE4_a);
+      if (ecx & bit_FMA4)
+        features |= (1 << FEATURE_FMA4);
+      if (ecx & bit_XOP)
+        features |= (1 << FEATURE_XOP);
+    }
+    
   __cpu_model.__cpu_features[0] = features;
 }
 
#include <stdio.h>

#define TEST_ARCH

__attribute__((target("default")))
void test_target() {
    printf("default version\n");
}

__attribute__((target("sse2")))
void test_target() {
    printf("sse2 version\n");
}

__attribute__((target("sse4a")))
void test_target() {
    printf("sse4a version\n");
}

__attribute__((target("sse4.1")))
void test_target() {
    printf("sse4.1 version\n");
}

__attribute__((target("avx")))
void test_target() {
    printf("avx version\n");
}

__attribute__((target("xop")))
void test_target() {
    printf("xop version\n");
}

#ifdef TEST_ARCH
__attribute__((target("arch=amdfam10")))
void test_target() {
    printf("amdfam10 version\n");
}

__attribute__((target("arch=btver2")))
void test_target() {
    printf("btver2 version\n");
}

__attribute__((target("arch=slm")))
void test_target() {
    printf("silvermont version\n");
}

__attribute__((target("arch=corei7-avx")))
void test_target() {
    printf("corei7-avx version\n");
}

__attribute__((target("arch=core-avx2")))
void test_target() {
    printf("core-avx2 version\n");
}

#endif

int main() {
   test_target();
   return 0;
}

Reply via email to