Support for getting CPU type and feature information at run-time.

The following patch provides support for finding the platform type at run-time, 
like cpu type and features supported. The multi-versioning framework will use 
the builtins added to dispatch the right function version. Please refer to 
http://gcc.gnu.org/ml/gcc/2011-08/msg00298.html for details on function 
multi-versioning usability.

        * tree-pass.h (pass_tree_fold_builtin_target): New pass.
        * builtins.def (BUILT_IN_TARGET_SUPPORTS_CMOV): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_MMX): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_POPCOUNT): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSE): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSE2): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSE3): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSSE3): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSE4_1): New builtin.
        (BUILT_IN_TARGET_SUPPORTS_SSE4_2): New builtin.
        (BUILT_IN_TARGET_IS_AMD): New builtin.
        (BUILT_IN_TARGET_IS_INTEL): New builtin.
        (BUILT_IN_TARGET_IS_COREI7_NEHALEM): New builtin.
        (BUILT_IN_TARGET_IS_COREI7_WESTMERE): New builtin.
        (BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE): New builtin.
        (BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA): New builtin.
        (BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI): New builtin.
        (BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL): New builtin.
        * mversn-dispatch.c (do_fold_builtin_target): New function.
        (gate_fold_builtin_target): New function.
        (pass_tree_fold_builtin_target): New pass.
        * timevar.def (TV_FOLD_BUILTIN_TARGET): New var.
        * passes.c (init_optimization_passes): Add new pass to pass list.
        * config/i386/i386.c (build_struct_with_one_bit_fields): New function.
        (make_var_decl): New function.
        (get_field_from_struct): New function.
        (make_constructor_to_get_target_type): New function.
        (fold_builtin_target): New function.
        (ix86_fold_builtin): New function.
        (TARGET_FOLD_BUILTIN): New macro.

        * gcc.dg/builtin_target.c: New test.
        
        * config/i386/i386-cpuinfo.c: New file.
        * config/i386/t-cpuinfo: New file.
        * config.host: Add t-cpuinfo to link i386-cpuinfo.o with libgcc

Index: libgcc/config.host
===================================================================
--- libgcc/config.host  (revision 177767)
+++ libgcc/config.host  (working copy)
@@ -609,7 +609,7 @@ case ${host} in
 i[34567]86-*-linux* | x86_64-*-linux* | \
   i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | \
   i[34567]86-*-gnu*)
-       tmake_file="${tmake_file} t-tls"
+       tmake_file="${tmake_file} t-tls i386/t-cpuinfo"
        if test "$libgcc_cv_cfi" = "yes"; then
                tmake_file="${tmake_file} t-stack i386/t-stack-i386"
        fi
Index: libgcc/config/i386/t-cpuinfo
===================================================================
--- libgcc/config/i386/t-cpuinfo        (revision 0)
+++ libgcc/config/i386/t-cpuinfo        (revision 0)
@@ -0,0 +1,2 @@
+# This is an endfile
+LIB2ADD += $(srcdir)/config/i386/i386-cpuinfo.c
Index: libgcc/config/i386/i386-cpuinfo.c
===================================================================
--- libgcc/config/i386/i386-cpuinfo.c   (revision 0)
+++ libgcc/config/i386/i386-cpuinfo.c   (revision 0)
@@ -0,0 +1,275 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ * Contributed by Sriraman Tallam <tmsri...@google.com>.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This code is adapted from gcc/config/i386/driver-i386.c. The CPUID
+ * instruction is used to figure out the cpu type and supported features.
+ * GCC runs __cpu_indicator_init from a constructor which sets the members
+ * of __cpu_model and __cpu_features.
+ */
+
+#include <string.h>
+
+#ifdef __GNUC__
+#include "cpuid.h"
+
+enum processor_type
+{
+  PROCESSOR_PENTIUM = 0,
+  PROCESSOR_CORE2,
+  PROCESSOR_COREI7_NEHALEM,
+  PROCESSOR_COREI7_WESTMERE,
+  PROCESSOR_COREI7_SANDYBRIDGE,
+  PROCESSOR_INTEL_GENERIC,
+  PROCESSOR_AMDFAM10_BARCELONA,
+  PROCESSOR_AMDFAM10_SHANGHAI,
+  PROCESSOR_AMDFAM10_ISTANBUL,
+  PROCESSOR_AMDFAM10_GENERIC,
+  PROCESSOR_AMD_GENERIC,
+  PROCESSOR_GENERIC,
+  PROCESSOR_max
+};
+
+enum vendor_signatures
+{
+  SIG_INTEL =  0x756e6547 /* Genu */,
+  SIG_AMD =    0x68747541 /* Auth */
+};
+
+
+/* Features supported. */
+
+struct __processor_features
+{
+  unsigned int __cpu_cmov : 1;
+  unsigned int __cpu_mmx : 1;
+  unsigned int __cpu_popcnt : 1;
+  unsigned int __cpu_sse : 1;
+  unsigned int __cpu_sse2 : 1;
+  unsigned int __cpu_sse3 : 1;
+  unsigned int __cpu_ssse3 : 1;
+  unsigned int __cpu_sse4_1 : 1;
+  unsigned int __cpu_sse4_2 : 1;
+};
+
+/* Flags exported. */
+
+struct __processor_model
+{
+  unsigned int __cpu_is_amd : 1;
+  unsigned int __cpu_is_intel : 1;
+  unsigned int __cpu_is_corei7_nehalem : 1;
+  unsigned int __cpu_is_corei7_westmere : 1;
+  unsigned int __cpu_is_corei7_sandybridge : 1;
+  unsigned int __cpu_is_amdfam10_barcelona : 1;
+  unsigned int __cpu_is_amdfam10_shanghai : 1;
+  unsigned int __cpu_is_amdfam10_istanbul : 1;
+};
+
+enum processor_type __cpu_type = PROCESSOR_GENERIC;
+struct __processor_features __cpu_features;
+struct __processor_model __cpu_model;
+
+static void
+get_amd_cpu (unsigned int family, unsigned int model)
+{
+  switch (family)
+    {
+    case 0x10:
+      switch (model)
+       {
+       case 0x2:
+         __cpu_type = PROCESSOR_AMDFAM10_BARCELONA;
+         __cpu_model.__cpu_is_amdfam10_barcelona = 1;
+         break;
+       case 0x4:
+         __cpu_type = PROCESSOR_AMDFAM10_SHANGHAI;
+         __cpu_model.__cpu_is_amdfam10_shanghai = 1;
+         break;
+       case 0x8:
+         __cpu_type = PROCESSOR_AMDFAM10_ISTANBUL;
+         __cpu_model.__cpu_is_amdfam10_istanbul = 1;
+         break;
+       default:
+         __cpu_type = PROCESSOR_AMDFAM10_GENERIC;
+         break;
+       }
+      break;
+    default:
+      __cpu_type = PROCESSOR_AMD_GENERIC;
+    }
+}
+
+static void
+get_intel_cpu (unsigned int family, unsigned int model, unsigned int brand_id)
+{
+  /* Parse family and model only if brand ID is 0. */
+  if (brand_id == 0)
+    {
+      switch (family)
+       {
+       case 0x5:
+         __cpu_type = PROCESSOR_PENTIUM;
+         break;
+       case 0x6:
+         switch (model)
+           {
+           case 0x1a:
+           case 0x1e:
+           case 0x1f:
+           case 0x2e:
+             /* Nehalem.  */
+             __cpu_type = PROCESSOR_COREI7_NEHALEM;
+             __cpu_model.__cpu_is_corei7_nehalem = 1;
+             break;
+           case 0x25:
+           case 0x2c:
+           case 0x2f:
+             /* Westmere.  */
+             __cpu_type = PROCESSOR_COREI7_WESTMERE;
+             __cpu_model.__cpu_is_corei7_westmere = 1;
+             break;
+           case 0x2a:
+             /* Sandy Bridge.  */
+             __cpu_type = PROCESSOR_COREI7_SANDYBRIDGE;
+             __cpu_model.__cpu_is_corei7_sandybridge = 1;
+             break;
+           case 0x17:
+           case 0x1d:
+             /* Penryn.  */
+           case 0x0f:
+             /* Merom.  */
+             __cpu_type = PROCESSOR_CORE2;
+             break;
+           default:
+             __cpu_type = PROCESSOR_INTEL_GENERIC;
+             break;
+           }
+         break;
+       default:
+         /* We have no idea.  */
+         __cpu_type = PROCESSOR_INTEL_GENERIC;
+         break;
+       }
+    }
+}                      
+
+static void
+get_available_features (unsigned int ecx, unsigned int edx)
+{
+  __cpu_features.__cpu_cmov = (edx & bit_CMOV) ? 1 : 0;
+  __cpu_features.__cpu_mmx = (edx & bit_MMX) ? 1 : 0;
+  __cpu_features.__cpu_sse = (edx & bit_SSE) ? 1 : 0;
+  __cpu_features.__cpu_sse2 = (edx & bit_SSE2) ? 1 : 0;
+  __cpu_features.__cpu_popcnt = (ecx & bit_POPCNT) ? 1 : 0;
+  __cpu_features.__cpu_sse3 = (ecx & bit_SSE3) ? 1 : 0;
+  __cpu_features.__cpu_ssse3 = (ecx & bit_SSSE3) ? 1 : 0;
+  __cpu_features.__cpu_sse4_1 = (ecx & bit_SSE4_1) ? 1 : 0;
+  __cpu_features.__cpu_sse4_2 = (ecx & bit_SSE4_2) ? 1 : 0;
+}
+
+/* A noinline function calling __get_cpuid. Having many calls to
+   cpuid in one function in 32-bit mode causes GCC to complain:
+   "can’t find a register in class ‘CLOBBERED_REGS’".  This is
+   related to PR rtl-optimization 44174. */
+
+static int __attribute__ ((noinline))
+__get_cpuid_output (unsigned int __level,
+                   unsigned int *__eax, unsigned int *__ebx,
+                   unsigned int *__ecx, unsigned int *__edx)
+{
+  return __get_cpuid (__level, __eax, __ebx, __ecx, __edx);
+}
+
+/* This function will be linked in to binaries that need to look up
+   CPU information.  */
+
+void
+__cpu_indicator_init(void)
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  int max_level = 5;
+  unsigned int vendor;
+  unsigned int model, family, brand_id;
+
+  memset (&__cpu_features, 0, sizeof (struct __processor_features));
+  memset (&__cpu_model, 0, sizeof (struct __processor_model));
+
+  /* Assume cpuid insn present. Run in level 0 to get vendor id. */
+  if (!__get_cpuid_output (0, &eax, &ebx, &ecx, &edx))
+    return;
+
+  vendor = ebx;
+  max_level = eax;
+
+  if (max_level < 1)
+    return;
+
+  if (!__get_cpuid_output (1, &eax, &ebx, &ecx, &edx))
+    return;
+
+  model = (eax >> 4) & 0x0f;
+  family = (eax >> 8) & 0x0f;
+  brand_id = ebx & 0xff;
+
+  /* Adjust model and family for Intel CPUS. */
+  if (vendor == SIG_INTEL)
+    {
+      unsigned int extended_model, extended_family;
+
+      extended_model = (eax >> 12) & 0xf0;
+      extended_family = (eax >> 20) & 0xff;
+      if (family == 0x0f)
+       {
+         family += extended_family;
+         model += extended_model;
+       }
+      else if (family == 0x06)
+       model += extended_model;
+    }
+
+  /* Find CPU model. */
+
+  if (vendor == SIG_AMD)
+    {
+      __cpu_model.__cpu_is_amd = 1;
+      get_amd_cpu (family, model);
+    }
+  else if (vendor == SIG_INTEL)
+    {
+      __cpu_model.__cpu_is_intel = 1;
+      get_intel_cpu (family, model, brand_id);
+    }
+
+  /* Find available features. */
+  get_available_features (ecx, edx);
+}
+
+#else
+
+void
+__cpu_indicator_init(void)
+{
+}
+
+#endif /* __GNUC__ */
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h     (revision 177767)
+++ gcc/tree-pass.h     (working copy)
@@ -449,6 +449,7 @@ extern struct gimple_opt_pass pass_split_functions
 extern struct gimple_opt_pass pass_feedback_split_functions;
 extern struct gimple_opt_pass pass_threadsafe_analyze;
 extern struct gimple_opt_pass pass_tree_convert_builtin_dispatch;
+extern struct gimple_opt_pass pass_tree_fold_builtin_target;
 
 /* IPA Passes */
 extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
Index: gcc/testsuite/gcc.dg/builtin_target.c
===================================================================
--- gcc/testsuite/gcc.dg/builtin_target.c       (revision 0)
+++ gcc/testsuite/gcc.dg/builtin_target.c       (revision 0)
@@ -0,0 +1,49 @@
+/* This test checks if the __builtin_target_* calls are recognized. */
+
+/* { dg-do run } */
+
+int
+fn1 ()
+{
+  if (__builtin_target_supports_cmov () < 0)
+    return -1;
+  if (__builtin_target_supports_mmx () < 0)
+    return -1;
+  if (__builtin_target_supports_popcount () < 0)
+    return -1;
+  if (__builtin_target_supports_sse () < 0)
+    return -1;
+  if (__builtin_target_supports_sse2 () < 0)
+    return -1;
+  if (__builtin_target_supports_sse3 () < 0)
+    return -1;
+  if (__builtin_target_supports_ssse3 () < 0)
+    return -1;
+  if (__builtin_target_supports_sse4_1 () < 0)
+    return -1;
+  if (__builtin_target_supports_sse4_2 () < 0)
+    return -1;
+  if (__builtin_target_is_amd () < 0)
+    return -1;
+  if (__builtin_target_is_intel () < 0)
+    return -1;
+  if (__builtin_target_is_corei7_nehalem () < 0)
+    return -1;
+  if (__builtin_target_is_corei7_westmere () < 0)
+    return -1;
+  if (__builtin_target_is_corei7_sandybridge () < 0)
+    return -1;
+  if (__builtin_target_is_amdfam10_barcelona () < 0)
+    return -1;
+  if (__builtin_target_is_amdfam10_shanghai () < 0)
+    return -1;
+  if (__builtin_target_is_amdfam10_istanbul () < 0)
+    return -1;
+
+  return 0;
+}
+
+int main ()
+{
+  return fn1 ();
+}
Index: gcc/builtins.def
===================================================================
--- gcc/builtins.def    (revision 177767)
+++ gcc/builtins.def    (working copy)
@@ -763,6 +763,25 @@ DEF_BUILTIN (BUILT_IN_EMUTLS_REGISTER_COMMON,
 /* Multiversioning builtin dispatch hook. */
 DEF_GCC_BUILTIN (BUILT_IN_DISPATCH, "dispatch", 
BT_FN_INT_PTR_FN_INT_PTR_PTR_VAR, ATTR_NULL)
 
+/* Builtins to determine target type and features at run-time. */
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_CMOV, "target_supports_cmov", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_MMX, "target_supports_mmx", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_POPCOUNT, 
"target_supports_popcount", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE, "target_supports_sse", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE2, "target_supports_sse2", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE3, "target_supports_sse3", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSSE3, "target_supports_ssse3", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_1, "target_supports_sse4_1", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_SUPPORTS_SSE4_2, "target_supports_sse4_2", 
BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMD, "target_is_amd", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_INTEL, "target_is_intel", BT_FN_INT, 
ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_NEHALEM, 
"target_is_corei7_nehalem", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_WESTMERE, 
"target_is_corei7_westmere", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE, 
"target_is_corei7_sandybridge", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA, 
"target_is_amdfam10_barcelona", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI, 
"target_is_amdfam10_shanghai", BT_FN_INT, ATTR_NULL)
+DEF_GCC_BUILTIN (BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL, 
"target_is_amdfam10_istanbul", BT_FN_INT, ATTR_NULL)
+
 /* Exception support.  */
 DEF_BUILTIN_STUB (BUILT_IN_UNWIND_RESUME, "__builtin_unwind_resume")
 DEF_BUILTIN_STUB (BUILT_IN_CXA_END_CLEANUP, "__builtin_cxa_end_cleanup")
Index: gcc/mversn-dispatch.c
===================================================================
--- gcc/mversn-dispatch.c       (revision 177767)
+++ gcc/mversn-dispatch.c       (working copy)
@@ -135,6 +135,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "vecprim.h"
 #include "gimple-pretty-print.h"
+#include "target.h"
 
 typedef struct cgraph_node* NODEPTR;
 DEF_VEC_P (NODEPTR);
@@ -1764,3 +1765,103 @@ struct gimple_opt_pass pass_tree_convert_builtin_d
   TODO_update_ssa | TODO_verify_ssa
  }
 };
+
+/* Fold calls to __builtin_target_* */
+
+static unsigned int
+do_fold_builtin_target (void)
+{
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+
+  /* Go through each stmt looking for __builtin_target_* calls */
+  FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+        {
+         gimple stmt = gsi_stmt (gsi);
+         gimple assign_stmt;
+          tree call_decl;
+         tree lhs_retval;
+         tree folded_val;
+
+         tree ssa_var, tmp_var;
+         gimple init_stmt;
+
+          if (!is_gimple_call (stmt))
+            continue;
+
+          call_decl = gimple_call_fndecl (stmt);
+
+         /* Check if it is a __builtin_target_* call. */
+
+         if (call_decl == NULL
+             || DECL_NAME (call_decl) == NULL_TREE
+             || DECL_BUILT_IN_CLASS (call_decl) != BUILT_IN_NORMAL
+             || strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
+                         "__builtin_target") == NULL)
+            continue;
+
+         /* If the lhs is NULL there is no need to fold the call. */
+         lhs_retval = gimple_call_lhs(stmt);
+         if (lhs_retval == NULL)
+           continue;
+
+         /* Call the target hook to fold the builtin */        
+          folded_val = targetm.fold_builtin(call_decl, 0, NULL, false);
+
+         /* If the target does not support the builtin then fold it to zero. */
+         if (folded_val == NULL_TREE)
+           folded_val = build_zero_cst (unsigned_type_node);
+
+         /* Type cast unsigned value to integer */
+         tmp_var = create_tmp_var (unsigned_type_node, NULL);
+         init_stmt = gimple_build_assign (tmp_var, folded_val);
+         ssa_var = make_ssa_name (tmp_var, init_stmt);
+         gimple_assign_set_lhs (init_stmt, ssa_var);
+         mark_symbols_for_renaming (init_stmt);
+
+         assign_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs_retval, 
ssa_var, 0);
+         mark_symbols_for_renaming(assign_stmt);
+
+         gsi_insert_after_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
+         gsi_insert_after_without_update (&gsi, init_stmt, GSI_SAME_STMT);
+         /* Delete the original call. */
+         gsi_remove(&gsi, true);
+       }
+    }
+
+  return 0;
+}
+
+static bool
+gate_fold_builtin_target (void)
+{
+  return true;
+}
+
+/* Pass to fold __builtin_target_* functions */
+
+struct gimple_opt_pass pass_tree_fold_builtin_target =
+{
+ {
+  GIMPLE_PASS,
+  "fold_builtin_target",               /* name */
+  gate_fold_builtin_target,            /* gate */
+  do_fold_builtin_target,              /* execute */
+  NULL,                                        /* sub */
+  NULL,                                        /* next */
+  0,                                   /* static_pass_number */
+  TV_FOLD_BUILTIN_TARGET,              /* tv_id */
+  PROP_cfg,                            /* properties_required */
+  PROP_cfg,                            /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  TODO_dump_func |                     /* todo_flags_finish */
+  TODO_cleanup_cfg |
+  TODO_update_ssa |
+  TODO_verify_ssa
+ }
+};
+
+
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def     (revision 177767)
+++ gcc/timevar.def     (working copy)
@@ -124,6 +124,7 @@ DEFTIMEVAR (TV_PARSE_INMETH          , "parser inl
 DEFTIMEVAR (TV_TEMPLATE_INST         , "template instantiation")
 DEFTIMEVAR (TV_INLINE_HEURISTICS     , "inline heuristics")
 DEFTIMEVAR (TV_MVERSN_DISPATCH       , "multiversion dispatch")
+DEFTIMEVAR (TV_FOLD_BUILTIN_TARGET   , "fold __builtin_target calls")
 DEFTIMEVAR (TV_INTEGRATION           , "integration")
 DEFTIMEVAR (TV_TREE_GIMPLIFY        , "tree gimplify")
 DEFTIMEVAR (TV_TREE_EH              , "tree eh")
Index: gcc/passes.c
===================================================================
--- gcc/passes.c        (revision 177767)
+++ gcc/passes.c        (working copy)
@@ -1249,6 +1249,8 @@ init_optimization_passes (void)
     {
       struct opt_pass **p = &pass_ipa_multiversion_dispatch.pass.sub;
       NEXT_PASS (pass_tree_convert_builtin_dispatch);
+      /* Fold calls to __builtin_target_*. */
+      NEXT_PASS (pass_tree_fold_builtin_target);
       /* Rebuilding cgraph edges is necessary as the above passes change
          the call graph.  Otherwise, future optimizations use the old
         call graph and make wrong decisions sometimes.*/
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 177767)
+++ gcc/config/i386/i386.c      (working copy)
@@ -58,6 +58,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "sched-int.h"
 #include "sbitmap.h"
 #include "fibheap.h"
+#include "tree-flow.h"
+#include "tree-pass.h"
 
 enum upper_128bits_state
 {
@@ -7867,6 +7869,338 @@ ix86_build_builtin_va_list (void)
   return ret;
 }
 
+/* Returns a struct type with name NAME and number of fields equal to
+   NUM_FIELDS.  Each field is a unsigned int bit field of length 1 bit. */
+
+static tree
+build_struct_with_one_bit_fields (int num_fields, const char *name)
+{
+  int i;
+  char field_name [10];
+  tree field = NULL_TREE, field_chain = NULL_TREE;
+  tree type = make_node (RECORD_TYPE);
+
+  strcpy (field_name, "k_field");
+
+  for (i = 0; i < num_fields; i++)
+    {
+      /* Name the fields, 0_field, 1_field, ... */
+      field_name [0] = '0' + i;
+      field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+                         get_identifier (field_name), unsigned_type_node);
+      DECL_BIT_FIELD (field) = 1;
+      DECL_SIZE (field) = bitsize_one_node;
+      if (field_chain != NULL_TREE)
+       DECL_CHAIN (field) = field_chain;
+      field_chain = field;
+    }
+  finish_builtin_struct (type, name, field_chain, NULL_TREE);
+  return type;
+}
+
+/* Returns a VAR_DECL of type TYPE and name NAME. */
+
+static tree
+make_var_decl (tree type, const char *name)
+{
+  tree new_decl;
+  struct varpool_node *vnode;
+
+  new_decl = build_decl (UNKNOWN_LOCATION,
+                        VAR_DECL,
+                        get_identifier(name),
+                        type);
+
+  DECL_EXTERNAL (new_decl) = 1;
+  TREE_STATIC (new_decl) = 1;
+  TREE_PUBLIC (new_decl) = 1;
+  DECL_INITIAL (new_decl) = 0;
+  DECL_ARTIFICIAL (new_decl) = 0;
+  DECL_PRESERVE_P (new_decl) = 1;
+
+  make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
+  assemble_variable (new_decl, 0, 0, 0);
+
+  vnode = varpool_node (new_decl);
+  gcc_assert (vnode != NULL);
+  /* Set finalized to 1, otherwise it asserts in function "write_symbol" in
+     lto-streamer-out.c. */
+  vnode->finalized = 1;
+
+  return new_decl;
+}
+
+/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM
+   numbered field. */
+
+static tree
+get_field_from_struct (tree struct_type, int field_num)
+{
+  int i;
+  tree field = TYPE_FIELDS (struct_type);
+
+  for (i = 0; i < field_num; i++, field = DECL_CHAIN(field))
+    {
+      gcc_assert (field != NULL_TREE);
+    }
+
+  return field;
+}
+
+/* Create a new static constructor that calls __cpu_indicator_init ()
+   function defined in libgcc/config/i386-cpuinfo.c which runs cpuid
+   to figure out the type of the target. */
+
+static tree
+make_constructor_to_get_target_type (const char *name)
+{
+  tree decl, type, t;
+  gimple_seq seq;
+  basic_block new_bb;
+  tree old_current_function_decl;
+
+  tree __cpu_indicator_int_decl;
+  gimple constructor_body;
+
+
+  type = build_function_type_list (void_type_node, NULL_TREE);
+
+  /* Make a call stmt to __cpu_indicator_init */
+  __cpu_indicator_int_decl = build_fn_decl ("__cpu_indicator_init", type);
+  constructor_body = gimple_build_call (__cpu_indicator_int_decl, 0);
+  DECL_EXTERNAL (__cpu_indicator_int_decl) = 1;
+
+  decl = build_fn_decl (name, type);
+
+  DECL_NAME (decl) = get_identifier (name);
+  SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
+  gcc_assert (cgraph_node (decl) != NULL);
+
+  TREE_USED (decl) = 1;
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_IGNORED_P (decl) = 0;
+  TREE_PUBLIC (decl) = 0;
+  DECL_UNINLINABLE (decl) = 1;
+  DECL_EXTERNAL (decl) = 0;
+  DECL_CONTEXT (decl) = NULL_TREE;
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  DECL_STATIC_CONSTRUCTOR (decl) = 1;
+  TREE_READONLY (decl) = 0;
+  DECL_PURE_P (decl) = 0;
+
+  /* This is a comdat. */ 
+  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+
+  /* Build result decl and add to function_decl. */
+  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
+  DECL_ARTIFICIAL (t) = 1;
+  DECL_IGNORED_P (t) = 1;
+  DECL_RESULT (decl) = t;
+
+  gimplify_function_tree (decl);
+
+  /* Build CFG for this function. */
+
+  old_current_function_decl = current_function_decl;
+  push_cfun (DECL_STRUCT_FUNCTION (decl));
+  current_function_decl = decl;
+  init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
+  cfun->curr_properties |=
+    (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
+     PROP_ssa);
+  new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+  make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+
+  /* XXX: Not sure if the edge commented below is necessary.  If I add this
+     edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
+     " if (e->flags & EDGE_FALLTHRU)"
+     during -fprofile-generate.
+     Otherwise, it is fine.  Deleting this edge does not break anything.
+     Commenting this so that it is clear I am intentionally not doing this.*/
+  /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
+
+  seq = gimple_seq_alloc_with_stmt (constructor_body);
+
+  set_bb_seq (new_bb, seq);
+  gimple_set_bb (constructor_body, new_bb);
+
+  /* Set the lexical block of the constructor body. Fails the inliner
+     other wise. */
+  gimple_set_block (constructor_body, DECL_INITIAL (decl));
+
+  /* This call is very important if this pass runs when the IR is in
+     SSA form.  It breaks things in strange ways otherwise. */
+  init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
+  /* add_referenced_var (version_selector_var); */
+
+  cgraph_add_new_function (decl, true);
+  cgraph_call_function_insertion_hooks (cgraph_node (decl));
+  cgraph_mark_needed_node (cgraph_node (decl));
+
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+  return decl;
+}
+
+/* FNDECL is a __builtin_target_* call that is folded into an integer defined
+   in libgcc/config/i386/i386-cpuinfo.c */
+
+static tree 
+fold_builtin_target (tree fndecl)
+{
+  /* This is the order of bit-fields in __processor_features in
+     i386-cpuinfo.c */
+  enum processor_features
+  {
+    F_CMOV = 0,
+    F_MMX,
+    F_POPCNT,
+    F_SSE,
+    F_SSE2,
+    F_SSE3,
+    F_SSSE3,
+    F_SSE4_1,
+    F_SSE4_2,
+    F_MAX
+  };
+
+  /* This is the order of bit-fields in __processor_model in
+     i386-cpuinfo.c */
+  enum processor_model
+  {
+    M_AMD = 0,
+    M_INTEL,
+    M_COREI7_NEHALEM,
+    M_COREI7_WESTMERE,
+    M_COREI7_SANDYBRIDGE,
+    M_AMDFAM10_BARCELONA,
+    M_AMDFAM10_SHANGHAI,
+    M_AMDFAM10_ISTANBUL,
+    M_MAX
+  };
+
+  static tree __processor_features_type = NULL_TREE;
+  static tree __cpu_features_var = NULL_TREE;
+  static tree __processor_model_type = NULL_TREE;
+  static tree __cpu_model_var = NULL_TREE;
+  static tree ctor_decl = NULL_TREE;
+  static tree field;
+  static tree which_struct;
+
+  /* Make a call to __cpu_indicatior_init in a constructor.
+     Function __cpu_indicator_init is defined in i386-cpuinfo.c. */
+  if (ctor_decl == NULL_TREE)
+   ctor_decl = make_constructor_to_get_target_type 
+               ("__cpu_indicator_init_ctor");
+
+  if (__processor_features_type == NULL_TREE)
+    __processor_features_type = build_struct_with_one_bit_fields (F_MAX,
+                                 "__processor_features");
+
+  if (__processor_model_type == NULL_TREE)
+    __processor_model_type = build_struct_with_one_bit_fields (M_MAX,
+                                 "__processor_model");
+
+  if (__cpu_features_var == NULL_TREE)
+    __cpu_features_var = make_var_decl (__processor_features_type,
+                                       "__cpu_features");
+
+  if (__cpu_model_var == NULL_TREE)
+    __cpu_model_var = make_var_decl (__processor_model_type,
+                                    "__cpu_model");
+
+  /* Look at fndecl code to identify the field requested. */ 
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case BUILT_IN_TARGET_SUPPORTS_CMOV:
+      field = get_field_from_struct (__processor_features_type, F_CMOV);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_MMX:
+      field = get_field_from_struct (__processor_features_type, F_MMX);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_POPCOUNT:
+      field = get_field_from_struct (__processor_features_type, F_POPCNT);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSE:
+      field = get_field_from_struct (__processor_features_type, F_SSE);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSE2:
+      field = get_field_from_struct (__processor_features_type, F_SSE2);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSE3:
+      field = get_field_from_struct (__processor_features_type, F_SSE3);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSSE3:
+      field = get_field_from_struct (__processor_features_type, F_SSE3);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSE4_1:
+      field = get_field_from_struct (__processor_features_type, F_SSE4_1);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_SUPPORTS_SSE4_2:
+      field = get_field_from_struct (__processor_features_type, F_SSE4_2);
+      which_struct = __cpu_features_var;
+      break;
+    case BUILT_IN_TARGET_IS_AMD:
+      field = get_field_from_struct (__processor_model_type, M_AMD);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_INTEL:
+      field = get_field_from_struct (__processor_model_type, M_INTEL);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_COREI7_NEHALEM:
+      field = get_field_from_struct (__processor_model_type, 
M_COREI7_NEHALEM);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_COREI7_WESTMERE:
+      field = get_field_from_struct (__processor_model_type, 
M_COREI7_WESTMERE);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_COREI7_SANDYBRIDGE:
+      field = get_field_from_struct (__processor_model_type, 
M_COREI7_SANDYBRIDGE);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_AMDFAM10_BARCELONA:
+      field = get_field_from_struct (__processor_model_type, 
M_AMDFAM10_BARCELONA);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_AMDFAM10_SHANGHAI:
+      field = get_field_from_struct (__processor_model_type, 
M_AMDFAM10_SHANGHAI);;
+      which_struct = __cpu_model_var;
+      break;
+    case BUILT_IN_TARGET_IS_AMDFAM10_ISTANBUL:
+      field = get_field_from_struct (__processor_model_type, 
M_AMDFAM10_ISTANBUL);;
+      which_struct = __cpu_model_var;
+      break;
+    default:
+      return NULL_TREE;
+    }
+
+  return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, 
NULL_TREE);
+}
+
+/* Folds __builtin_target_* builtins. */
+
+static tree
+ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
+                   tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED)
+{
+  const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
+      && strstr(decl_name, "__builtin_target") != NULL)
+    return fold_builtin_target (fndecl);
+
+  return NULL_TREE;
+}
+
 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
 
 static void
@@ -35097,6 +35431,9 @@ ix86_autovectorize_vector_sizes (void)
 #undef TARGET_BUILD_BUILTIN_VA_LIST
 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
 
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN ix86_fold_builtin
+
 #undef TARGET_ENUM_VA_LIST_P
 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
 

--
This patch is available for review at http://codereview.appspot.com/4893046

Reply via email to