On Thu, Jun 25, 2015 at 05:05:22AM +0100, Jeff Law wrote: > On 06/23/2015 09:42 AM, James Greenhalgh wrote: > > > > On Tue, Jun 23, 2015 at 09:52:01AM +0100, Jakub Jelinek wrote: > >> On Tue, Jun 23, 2015 at 09:18:52AM +0100, James Greenhalgh wrote: > >>> This patch fixes the issue by always calling get_move_ratio in the SRA > >>> code, ensuring that an up-to-date value is used. > >>> > >>> Unfortunately, this means we have to use 0 as a sentinel value for > >>> the parameter - indicating no user override of the feature - and > >>> therefore cannot use it to disable scalarization. However, there > >>> are other ways to disable scalarazation (-fno-tree-sra) so this is not > >>> a great loss. > >> > >> You can handle even that. > >> > > > > <snip> > > > >> enum compiler_param param > >> = optimize_function_for_size_p (cfun) > >> ? PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE > >> : PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED; > >> unsigned max_scalarization_size = PARAM_VALUE (param) * BITS_PER_UNIT; > >> if (!max_scalarization_size && > >> !global_options_set.x_param_values[param]) > >> > >> Then it will handle explicit --param sra-max-scalarization-size-Os*=0 > >> differently from implicit 0. > > > > Ah hah! OK, I've respun the patch removing this extra justification in > > the documentation and reshuffling the logic a little. > > > >> OT, shouldn't max_scalarization_size be at least unsigned HOST_WIDE_INT, > >> so that it doesn't overflow for larger values (0x40000000 etc.)? > >> Probably need some cast in the multiplication to avoid UB in the compiler. > > > > I've increased the size of max_scalarization_size to a UHWI in this spin. > > > > Bootstrapped and tested on AArch64 and x86-64 with no issues and checked > > to see the PR is fixed. > > > > OK for trunk, and gcc-5 in a few days? > > > > Thanks, > > James > > > > --- > > gcc/ > > > > 2015-06-23 James Greenhalgh <james.greenha...@arm.com> > > > > PR tree-optimization/66119 > > * toplev.c (process_options): Don't set up default values for > > the sra_max_scalarization_size_{speed,size} parameters. > > * tree-sra (analyze_all_variable_accesses): If no values > > have been set for the sra_max_scalarization_size_{speed,size} > > parameters, call get_move_ratio to get target defaults. > Any testcase for this change? > > OK with a testcase. > > jeff
Thanks Jeff, I'm not really up to scratch on the gotchas for the dg directives targetting avx on x86, does the testcase in this patch (taken from the PR) look OK? Cheers, James
diff --git a/gcc/testsuite/g++.dg/pr66119.C b/gcc/testsuite/g++.dg/pr66119.C new file mode 100644 index 0000000..9979e91 --- /dev/null +++ b/gcc/testsuite/g++.dg/pr66119.C @@ -0,0 +1,69 @@ +/* PR66119 - MOVE_RATIO is not constant in a compiler run, so Scalar + Reduction of Aggregates must ask the back-end more than once what + the value of MOVE_RATIO now is. */ + +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-std=c++11 -O3 -mavx -fdump-tree-sra -march=slm" { target avx_runtime } } */ + +#include <immintrin.h> + +class MyAVX +{ + __m256d data; +public: + MyAVX () = default; + MyAVX (const MyAVX &) = default; + MyAVX (__m256d _data) : data(_data) { ; } + + MyAVX & operator= (const MyAVX &) = default; + + operator __m256d () const { return data; } + MyAVX operator+ (MyAVX s2) { return data+s2.data; } +}; + +template <typename T> class AVX_trait { ; }; + +template <> class AVX_trait<double> { +public: + typedef __m256d TSIMD; +}; + + +template <typename T> +class MyTSIMD +{ + typename AVX_trait<T>::TSIMD data; + +public: + MyTSIMD () = default; + MyTSIMD (const MyTSIMD &) = default; + // MyTSIMD (const MyTSIMD & s2) : data(s2.data) { ; } + MyTSIMD (typename AVX_trait<T>::TSIMD _data) : data(_data) { ; } + + operator typename AVX_trait<T>::TSIMD() const { return data; } + MyTSIMD operator+ (MyTSIMD s2) { return data+s2.data; } +}; + +// using MyVec = MyAVX; +using MyVec = MyTSIMD<double>; + +class Vec2 +{ + MyVec a, b; +public: + Vec2 (MyVec aa, MyVec ab) : a(aa), b(ab) { ; } + Vec2 operator+ (Vec2 v2) { return Vec2(a+v2.a, b+v2.b); } +}; + +inline __attribute__ ((__always_inline__)) +Vec2 ComputeSomething (Vec2 a, Vec2 b) +{ + return a+b; +} + +Vec2 TestFunction (Vec2 a, Vec2 b) +{ + return ComputeSomething (a,b); +} + +/* { dg-final { scan-tree-dump "Created a replacement for b" "sra" } } */ diff --git a/gcc/toplev.c b/gcc/toplev.c index 2f43a89..902bfc7 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1301,20 +1301,6 @@ process_options (void) so we can correctly initialize debug output. */ no_backend = lang_hooks.post_options (&main_input_filename); - /* Set default values for parameters relation to the Scalar Reduction - of Aggregates passes (SRA and IP-SRA). We must do this here, rather - than in opts.c:default_options_optimization as historically these - tuning heuristics have been based on MOVE_RATIO, which on some - targets requires other symbols from the backend. */ - maybe_set_param_value - (PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED, - get_move_ratio (true) * UNITS_PER_WORD, - global_options.x_param_values, global_options_set.x_param_values); - maybe_set_param_value - (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE, - get_move_ratio (false) * UNITS_PER_WORD, - global_options.x_param_values, global_options_set.x_param_values); - /* Some machines may reject certain combinations of options. */ targetm.target_option.override (); diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 8e34244..5f573f6 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2549,11 +2549,20 @@ analyze_all_variable_accesses (void) bitmap tmp = BITMAP_ALLOC (NULL); bitmap_iterator bi; unsigned i; - unsigned max_scalarization_size - = (optimize_function_for_size_p (cfun) - ? PARAM_VALUE (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE) - : PARAM_VALUE (PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED)) - * BITS_PER_UNIT; + bool optimize_speed_p = !optimize_function_for_size_p (cfun); + + enum compiler_param param = optimize_speed_p + ? PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED + : PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE; + + /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>, + fall back to a target default. */ + unsigned HOST_WIDE_INT max_scalarization_size + = global_options_set.x_param_values[param] + ? PARAM_VALUE (param) + : get_move_ratio (optimize_speed_p) * UNITS_PER_WORD; + + max_scalarization_size *= BITS_PER_UNIT; EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) if (bitmap_bit_p (should_scalarize_away_bitmap, i)