Hello! Attached patch adds missing PTA_PRFCHW and PTA_FXSR flags to x86 processor alias table. PRFCHW CPUID flag is shared with 3dnow prefetch flag, so some additional logic is needed to avoid generating SSE prefetches for non-SSE 3dNow! targets, while still generating full set of 3dnow prefetches on 3dNow! targets.
2013-05-15 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.c (iy86_option_override_internal): Update processor_alias_table for missing PTA_PRFCHW and PTA_FXSR flags. Add PTA_POPCNT to corei7 entry and remove PTA_SSE from athlon-4 entry. Do not enable SSE prefetch on non-SSE 3dNow! targets. Enable TARGET_PRFCHW for TARGET_3DNOW targets. * config/i386/i386.md (prefetch): Enable for TARGET_PRFCHW instead of TARGET_3DNOW. (*prefetch_3dnow): Enable for TARGET_PRFCHW only. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and was committed to mainline SVN. The patch will be backported to 4.8 branch in a couple of days. Uros.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 198933) +++ config/i386/i386.c (working copy) @@ -2892,9 +2892,10 @@ ix86_option_override_internal (bool main_args_p) {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, - {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, - {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, - {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE}, + {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, + {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, + {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, + PTA_MMX | PTA_SSE | PTA_FXSR}, {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR}, @@ -2917,8 +2918,8 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR}, {"corei7", PROCESSOR_COREI7, CPU_COREI7, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 - | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_FXSR}, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 + | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_FXSR}, {"corei7-avx", PROCESSOR_COREI7, CPU_COREI7, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX @@ -2940,49 +2941,49 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR}, {"geode", PROCESSOR_GEODE, CPU_GEODE, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, - {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, - {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, + {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, + {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW}, {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW}, {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR}, {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, - PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, + PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR}, {"x86-64", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF}, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR}, {"k8", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"k8-sse3", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"opteron", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"opteron-sse3", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"athlon64", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"athlon64-sse3", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"athlon-fx", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_NO_SAHF}, + | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR}, {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 + | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, - PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2 + | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR}, {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 @@ -3592,14 +3593,18 @@ ix86_option_override_internal (bool main_args_p) ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit; /* Enable SSE prefetch. */ - if (TARGET_SSE || TARGET_PRFCHW) + if (TARGET_SSE || (TARGET_PRFCHW && !TARGET_3DNOW)) x86_prefetch_sse = true; - /* Turn on popcnt instruction for -msse4.2 or -mabm. */ + /* Enable prefetch{,w} instructions for -m3dnow. */ + if (TARGET_3DNOW) + ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW & ~ix86_isa_flags_explicit; + + /* Enable popcnt instruction for -msse4.2 or -mabm. */ if (TARGET_SSE4_2 || TARGET_ABM) ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; - /* Turn on lzcnt instruction for -mabm. */ + /* Enable lzcnt instruction for -mabm. */ if (TARGET_ABM) ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit; Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 198933) +++ config/i386/i386.md (working copy) @@ -17041,21 +17041,18 @@ [(prefetch (match_operand 0 "address_operand") (match_operand:SI 1 "const_int_operand") (match_operand:SI 2 "const_int_operand"))] - "TARGET_PREFETCH_SSE || TARGET_3DNOW" + "TARGET_PREFETCH_SSE || TARGET_PRFCHW" { - int rw = INTVAL (operands[1]); + bool write = INTVAL (operands[1]) != 0; int locality = INTVAL (operands[2]); - gcc_assert (rw == 0 || rw == 1); gcc_assert (IN_RANGE (locality, 0, 3)); - if (TARGET_PRFCHW && rw) - operands[2] = GEN_INT (3); /* Use 3dNOW prefetch in case we are asking for write prefetch not supported by SSE counterpart or the SSE prefetch is not available (K6 machines). Otherwise use SSE prefetch as it allows specifying of locality. */ - else if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE)) operands[2] = GEN_INT (3); else operands[1] = const0_rtx; @@ -17086,7 +17083,7 @@ [(prefetch (match_operand 0 "address_operand" "p") (match_operand:SI 1 "const_int_operand" "n") (const_int 3))] - "TARGET_3DNOW || TARGET_PRFCHW" + "TARGET_PRFCHW" { if (INTVAL (operands[1]) == 0) return "prefetch\t%a0";