Hi! Working virtually out of Pago Pago.
Is there a reason why we don't prefer 32-byte integer vector modes even for AVX? If a vectorized loop needs some operation that is only supported by AVX2 we would retry whenever seeing such stmt, so what I think this costs us mainly some small amount of time during vectorization analysis. Examples of functions that can be vectorized with this patch using 32-byte vectors (and the runtime of the testcase decreased from ~ 0m2.876s to ~ 0m2.258s). #define N 1024 int a[N], b[N]; float c[N]; __attribute__((noinline, noclone)) void f1 () { int i; for (i = 0; i < N; i++) a[i] = b[i]; } __attribute__((noinline, noclone)) void f2 () { a[0] = b[0]; a[1] = b[1]; a[2] = b[2]; a[3] = b[3]; a[4] = b[4]; a[5] = b[5]; a[6] = b[6]; a[7] = b[7]; } __attribute__((noinline, noclone)) void f3 () { int i; for (i = 0; i < N; i++) c[i] = a[i]; } __attribute__((noinline, noclone)) void f4 () { int i; for (i = 0; i < N; i++) a[i] = 19; } int main () { int i; for (i = 0; i < N; i++) b[i] = i; for (i = 0; i < 10000000; i++) { f1 (); f2 (); f3 (); f4 (); } return 0; } 2011-11-08 Jakub Jelinek <ja...@redhat.com> * config/i386/i386.c (ix86_preferred_simd_mode): Even for TARGET_AVX if not TARGET_PREFER_AVX128 return 32-byte SI/DI vectors. --- gcc/config/i386/i386.c.jj 2011-11-07 08:32:09.000000000 -1100 +++ gcc/config/i386/i386.c 2011-11-07 19:19:57.000000000 -1100 @@ -37881,9 +37881,9 @@ ix86_preferred_simd_mode (enum machine_m case HImode: return TARGET_AVX2 ? V16HImode : V8HImode; case SImode: - return TARGET_AVX2 ? V8SImode : V4SImode; + return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode; case DImode: - return TARGET_AVX2 ? V4DImode : V2DImode; + return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode; case SFmode: if (TARGET_AVX && !TARGET_PREFER_AVX128) Jakub