Dear Michael, Thanks for updating the patch. I am afraid that my attention to gfortran is somewhat limited at present. However, I see that Dominique has verified your patch and that all is well.
The resulting speed up for nf.f90 is rather remarkable. What specific feature of the fortran leads to a 30=>15s ? Cheers Paul On Mon, Apr 11, 2011 at 6:04 PM, Michael Matz <m...@suse.de> wrote: > On Sat, 9 Apr 2011, Paul Richard Thomas wrote: > >> I find that both nf.f90 and capacita.f90 segfault in runtime for any >> stack size. > > Try this patch. I've verified that capacita and nf work with it and > -march=native -ffast-math -funroll-loops -fstack-arrays -O3 . In fact all > of polyhedron works for me on these flags. (I've set a ulimit -s of > 512MB, but I don't know if such a large amount is required). > > > Ciao, > Michael. > > * trans-array.c (toplevel): Include gimple.h. > (gfc_trans_allocate_array_storage): Check flag_stack_arrays, > properly expand variable length arrays. > (gfc_trans_auto_array_allocation): If flag_stack_arrays create > variable length decls and associate them with their scope. > * gfortran.h (gfc_option_t): Add flag_stack_arrays member. > * options.c (gfc_init_options): Handle -fstack_arrays option. > * lang.opt (fstack-arrays): Add option. > * invoke.texi (Code Gen Options): Document it. > * Make-lang.in (trans-array.o): Depend on GIMPLE_H. > > Index: trans-array.c > =================================================================== > *** trans-array.c (revision 172206) > --- trans-array.c (working copy) > *************** along with GCC; see the file COPYING3. > *** 81,86 **** > --- 81,87 ---- > #include "system.h" > #include "coretypes.h" > #include "tree.h" > + #include "gimple.h" > #include "diagnostic-core.h" /* For internal_error/fatal_error. */ > #include "flags.h" > #include "gfortran.h" > *************** gfc_trans_allocate_array_storage (stmtbl > *** 630,647 **** > { > /* Allocate the temporary. */ > onstack = !dynamic && initial == NULL_TREE > ! && gfc_can_put_var_on_stack (size); > > if (onstack) > { > /* Make a temporary variable to hold the data. */ > tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem), > nelem, gfc_index_one_node); > tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node, > tmp); > tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)), > tmp); > tmp = gfc_create_var (tmp, "A"); > tmp = gfc_build_addr_expr (NULL_TREE, tmp); > gfc_conv_descriptor_data_set (pre, desc, tmp); > } > --- 631,654 ---- > { > /* Allocate the temporary. */ > onstack = !dynamic && initial == NULL_TREE > ! && (gfc_option.flag_stack_arrays > ! || gfc_can_put_var_on_stack (size)); > > if (onstack) > { > /* Make a temporary variable to hold the data. */ > tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem), > nelem, gfc_index_one_node); > + tmp = gfc_evaluate_now (tmp, pre); > tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node, > tmp); > tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)), > tmp); > tmp = gfc_create_var (tmp, "A"); > + gfc_add_expr_to_block (pre, > + fold_build1_loc (input_location, > + DECL_EXPR, TREE_TYPE (tmp), > + tmp)); > tmp = gfc_build_addr_expr (NULL_TREE, tmp); > gfc_conv_descriptor_data_set (pre, desc, tmp); > } > *************** gfc_trans_auto_array_allocation (tree de > *** 4744,4749 **** > --- 4751,4758 ---- > tree tmp; > tree size; > tree offset; > + tree space; > + tree inittree; > bool onstack; > > gcc_assert (!(sym->attr.pointer || sym->attr.allocatable)); > *************** gfc_trans_auto_array_allocation (tree de > *** 4800,4814 **** > return; > } > > ! /* The size is the number of elements in the array, so multiply by the > ! size of an element to get the total size. */ > ! tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type)); > ! size = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, > ! size, fold_convert (gfc_array_index_type, tmp)); > > ! /* Allocate memory to hold the data. */ > ! tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size); > ! gfc_add_modify (&init, decl, tmp); > > /* Set offset of the array. */ > if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) > --- 4809,4838 ---- > return; > } > > ! if (gfc_option.flag_stack_arrays) > ! { > ! gcc_assert (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE); > ! space = build_decl (sym->declared_at.lb->location, > ! VAR_DECL, create_tmp_var_name ("A"), > ! TREE_TYPE (TREE_TYPE (decl))); > ! gfc_trans_vla_type_sizes (sym, &init); > ! } > ! else > ! { > ! /* The size is the number of elements in the array, so multiply by the > ! size of an element to get the total size. */ > ! tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type)); > ! size = fold_build2_loc (input_location, MULT_EXPR, > gfc_array_index_type, > ! size, fold_convert (gfc_array_index_type, tmp)); > > ! /* Allocate memory to hold the data. */ > ! tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size); > ! gfc_add_modify (&init, decl, tmp); > ! > ! /* Free the temporary. */ > ! tmp = gfc_call_free (convert (pvoid_type_node, decl)); > ! space = NULL_TREE; > ! } > > /* Set offset of the array. */ > if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) > *************** gfc_trans_auto_array_allocation (tree de > *** 4817,4826 **** > /* Automatic arrays should not have initializers. */ > gcc_assert (!sym->value); > > ! /* Free the temporary. */ > ! tmp = gfc_call_free (convert (pvoid_type_node, decl)); > > ! gfc_add_init_cleanup (block, gfc_finish_block (&init), tmp); > } > > > --- 4841,4866 ---- > /* Automatic arrays should not have initializers. */ > gcc_assert (!sym->value); > > ! inittree = gfc_finish_block (&init); > ! > ! if (space) > ! { > ! tree addr; > ! pushdecl (space); > > ! /* Don't create new scope, emit the DECL_EXPR in exactly the scope > ! where also space is located. */ > ! gfc_init_block (&init); > ! tmp = fold_build1_loc (input_location, DECL_EXPR, > ! TREE_TYPE (space), space); > ! gfc_add_expr_to_block (&init, tmp); > ! addr = fold_build1_loc (sym->declared_at.lb->location, > ! ADDR_EXPR, TREE_TYPE (decl), space); > ! gfc_add_modify (&init, decl, addr); > ! gfc_add_init_cleanup (block, gfc_finish_block (&init), NULL_TREE); > ! tmp = NULL_TREE; > ! } > ! gfc_add_init_cleanup (block, inittree, tmp); > } > > > Index: Make-lang.in > =================================================================== > *** Make-lang.in (revision 172206) > --- Make-lang.in (working copy) > *************** fortran/trans-stmt.o: $(GFORTRAN_TRANS_D > *** 353,359 **** > fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS) > fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \ > fortran/ioparm.def > ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS) > fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \ > gt-fortran-trans-intrinsic.h > fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h > --- 353,359 ---- > fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS) > fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \ > fortran/ioparm.def > ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS) $(GIMPLE_H) > fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \ > gt-fortran-trans-intrinsic.h > fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h > Index: gfortran.h > =================================================================== > *** gfortran.h (revision 172206) > --- gfortran.h (working copy) > *************** typedef struct > *** 2220,2225 **** > --- 2220,2226 ---- > int flag_d_lines; > int gfc_flag_openmp; > int flag_sign_zero; > + int flag_stack_arrays; > int flag_module_private; > int flag_recursive; > int flag_init_local_zero; > Index: lang.opt > =================================================================== > *** lang.opt (revision 172206) > --- lang.opt (working copy) > *************** fmax-stack-var-size= > *** 454,459 **** > --- 454,463 ---- > Fortran RejectNegative Joined UInteger > -fmax-stack-var-size=<n> Size in bytes of the largest array that will > be put on the stack > > + fstack-arrays > + Fortran > + Put all local arrays on stack. > + > fmodule-private > Fortran > Set default accessibility of module entities to PRIVATE. > Index: invoke.texi > =================================================================== > *** invoke.texi (revision 172206) > --- invoke.texi (working copy) > *************** and warnings}. > *** 167,172 **** > --- 167,173 ---- > -fbounds-check -fcheck-array-temporaries -fmax-array-constructor =@var{n} > @gol > -fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol > -fcoarray=@var{<none|single|lib>} -fmax-stack-var-size=@var{n} @gol > + -fstack-arrays @gol > -fpack-derived -frepack-arrays -fshort-enums -fexternal-blas @gol > -fblas-matmul-limit=@var{n} -frecursive -finit-local-zero @gol > -finit-integer=@var{n} -finit-real=@var{<zero|inf|-inf|nan|snan>} @gol > *************** Future versions of GNU Fortran may impro > *** 1361,1366 **** > --- 1362,1374 ---- > > The default value for @var{n} is 32768. > > + @item -fstack-arrays > + @opindex @code{fstack-arrays} > + Adding this option will make the fortran compiler put all local arrays, > + even those of unknown size onto stack memory. If your program uses very > + large local arrays it's possible that you'll have to extend your runtime > + limits for stack memory on some operating systems. > + > @item -fpack-derived > @opindex @code{fpack-derived} > @cindex structure packing > Index: options.c > =================================================================== > *** options.c (revision 172206) > --- options.c (working copy) > *************** gfc_init_options (unsigned int decoded_o > *** 123,128 **** > --- 123,129 ---- > > /* Default value of flag_max_stack_var_size is set in gfc_post_options. */ > gfc_option.flag_max_stack_var_size = -2; > + gfc_option.flag_stack_arrays = 0; > > gfc_option.flag_range_check = 1; > gfc_option.flag_pack_derived = 0; > *************** gfc_handle_option (size_t scode, const c > *** 783,788 **** > --- 784,793 ---- > gfc_option.flag_max_stack_var_size = value; > break; > > + case OPT_fstack_arrays: > + gfc_option.flag_stack_arrays = value; > + break; > + > case OPT_fmodule_private: > gfc_option.flag_module_private = value; > break; > -- The knack of flying is learning how to throw yourself at the ground and miss. --Hitchhikers Guide to the Galaxy