Hello!

Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
depends on TARGET_3DNOW, so it is not generated anymore. Following
patch is a 4.7 version of mainline patch [1].

2012-09-12  Uros Bizjak  <ubiz...@gmail.com>

        * config/i386/i386.h (x86_prefetchw): New global variable.
        (TARGET_PREFETCHW): New macro.
        * config/i386/i386.c (PTA_PREFETCHW): Ditto.
        (processor_alias_table): Add PTA_PREFETCHW to
        bdver1, bdver2 and btver1.
        (ix86_option_override_internal): Set x86_prefetchw for
        PTA_PREFETCHW targets.
        * config/i386/i386.md (prefetch): Expand to prefetchw
        for TARGET_PREFETCHW.
        (*prefetch_3dnow_<mode>): Also enable for TARGET_PREFETCHW.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}. Will be committed to 4.7 branch after [1] is committed to
mainline.

[1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html

Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 191227)
+++ config/i386/i386.c  (working copy)
@@ -2428,9 +2428,12 @@ enum processor_type ix86_tune;
 /* Which instruction set architecture to use.  */
 enum processor_type ix86_arch;
 
-/* true if sse prefetch instruction is not NOOP.  */
+/* True if processor has SSE prefetch instruction.  */
 int x86_prefetch_sse;
 
+/* True if processor has prefetchw instruction.  */
+int x86_prefetchw;
+ 
 /* -mstackrealign option */
 static const char ix86_force_align_arg_pointer_string[]
   = "force_align_arg_pointer";
@@ -2931,6 +2934,8 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_XOP                        (HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2               (HOST_WIDE_INT_1 << 30)
 #define PTA_BMI2               (HOST_WIDE_INT_1 << 31)
+#define PTA_PREFETCHW          (HOST_WIDE_INT_1 << 32)
+
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -3038,19 +3043,19 @@ ix86_option_override_internal (bool main_args_p)
        PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
        | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
       {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
-       PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-       | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-       | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-       | PTA_XOP | PTA_LWP},
+       PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+       | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+       | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+       | PTA_FMA4 | PTA_XOP | PTA_LWP},
       {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
-       PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-       | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-       | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-       | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+       PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+       | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+       | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+       | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
        | PTA_FMA},
       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
-        PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
-        | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+       PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+       | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
       {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
        0 /* flags are only used for -march switch.  */ },
       {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
@@ -3358,6 +3363,8 @@ ix86_option_override_internal (bool main_args_p)
          ix86_isa_flags |= OPTION_MASK_ISA_F16C;
        if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
          x86_prefetch_sse = true;
+       if (processor_alias_table[i].flags & PTA_PREFETCHW)
+         x86_prefetchw = true;
 
        break;
       }
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h  (revision 191226)
+++ config/i386/i386.h  (working copy)
@@ -450,9 +450,11 @@ extern unsigned char ix86_arch_features[X86_ARCH_L
 #define TARGET_FISTTP          (TARGET_SSE3 && TARGET_80387)
 
 extern int x86_prefetch_sse;
-
 #define TARGET_PREFETCH_SSE    x86_prefetch_sse
 
+extern int x86_prefetchw;
+#define TARGET_PREFETCHW       x86_prefetchw
+
 #define ASSEMBLER_DIALECT      (ix86_asm_dialect)
 
 #define TARGET_SSE_MATH                ((ix86_fpmath & FPMATH_SSE) != 0)
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 191227)
+++ config/i386/i386.md (working copy)
@@ -17671,12 +17671,14 @@
   gcc_assert (locality >= 0 && locality <= 3);
   gcc_assert (GET_MODE (operands[0]) == Pmode
              || GET_MODE (operands[0]) == VOIDmode);
+  if (TARGET_PREFETCHW && rw)
+    operands[2] = GEN_INT (3);
 
   /* Use 3dNOW prefetch in case we are asking for write prefetch not
      supported by SSE counterpart or the SSE prefetch is not available
      (K6 machines).  Otherwise use SSE prefetch as it allows specifying
      of locality.  */
-  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+  else if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
     operands[2] = GEN_INT (3);
   else
     operands[1] = const0_rtx;
@@ -17707,7 +17709,7 @@
   [(prefetch (match_operand:P 0 "address_operand" "p")
             (match_operand:SI 1 "const_int_operand" "n")
             (const_int 3))]
-  "TARGET_3DNOW"
+  "TARGET_3DNOW || TARGET_PREFETCHW"
 {
   if (INTVAL (operands[1]) == 0)
     return "prefetch\t%a0";

Reply via email to