Hi, Currently STV pass may require a stack realignment if any transformation occurs to enable SSE registers spill/fill. It appears it's invalid to increase stack alignment requirements at this point. Thus we have to either assume we need stack to be aligned if are going to run STV pass or disable STV if stack is not properly aligned. I suppose we shouldn't ignore explicitly requested stack alignment not beeing sure we really optimize anything (and STV is not an optimization frequiently applied). So I think we may disable TARGET_STV for such cases as Jakub suggested. This patch was bootstrapped and regtested on x86_64-pc-linux-gnu. OK for trunk?
Thanks, Ilya -- gcc/ 2016-01-27 Jakub Jelinek <ja...@redhat.com> Ilya Enkovich <enkovich....@gmail.com> PR target/69454 * config/i386/i386.c (convert_scalars_to_vector): Remove stack alignment fixes. (ix86_option_override_internal): Disable TARGET_STV if stack is not properly aligned. gcc/testsuite/ 2016-01-27 Ilya Enkovich <enkovich....@gmail.com> PR target/69454 * gcc.target/i386/pr69454-1.c: New test. * gcc.target/i386/pr69454-2.c: New test. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 34b57a4..9fb8db8 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3588,16 +3588,6 @@ convert_scalars_to_vector () bitmap_obstack_release (NULL); df_process_deferred_rescans (); - /* Conversion means we may have 128bit register spills/fills - which require aligned stack. */ - if (converted_insns) - { - if (crtl->stack_alignment_needed < 128) - crtl->stack_alignment_needed = 128; - if (crtl->stack_alignment_estimated < 128) - crtl->stack_alignment_estimated = 128; - } - return 0; } @@ -5453,6 +5443,11 @@ ix86_option_override_internal (bool main_args_p, opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; + /* Disable STV if -mpreferred-stack-boundary={2,3} - the needed + stack realignment will be extra cost the pass doesn't take into + account and the pass can't realign the stack. */ + if (ix86_preferred_stack_boundary < 64) + opts->x_target_flags &= ~MASK_STV; if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; diff --git a/gcc/testsuite/gcc.target/i386/pr69454-1.c b/gcc/testsuite/gcc.target/i386/pr69454-1.c new file mode 100644 index 0000000..12ecfd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr69454-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2 -msse2 -mno-accumulate-outgoing-args -mpreferred-stack-boundary=2" } */ + +typedef struct { long long w64[2]; } V128; +extern V128* fn2(void); +long long a; +V128 b; +void fn1() { + V128 *c = fn2(); + c->w64[0] = a ^ b.w64[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr69454-2.c b/gcc/testsuite/gcc.target/i386/pr69454-2.c new file mode 100644 index 0000000..28bab93 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr69454-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2 -mpreferred-stack-boundary=2" } */ + +extern void fn2 (); +long long a, b; + +void fn1 () +{ + long long c = a; + a = b ^ a; + fn2 (); + a = c; +}