Dear Michael,

Thanks for updating the patch.  I am afraid that my attention to
gfortran is somewhat limited at present.  However, I see that
Dominique has verified your patch and that all is well.

The resulting speed up for nf.f90 is rather remarkable.  What specific
feature of the fortran leads to a 30=>15s ?

Cheers

Paul

On Mon, Apr 11, 2011 at 6:04 PM, Michael Matz <m...@suse.de> wrote:
> On Sat, 9 Apr 2011, Paul Richard Thomas wrote:
>
>> I find that both nf.f90 and capacita.f90 segfault in runtime for any
>> stack size.
>
> Try this patch.  I've verified that capacita and nf work with it and
> -march=native -ffast-math -funroll-loops -fstack-arrays -O3 .  In fact all
> of polyhedron works for me on these flags.  (I've set a ulimit -s of
> 512MB, but I don't know if such a large amount is required).
>
>
> Ciao,
> Michael.
>
>        * trans-array.c (toplevel): Include gimple.h.
>        (gfc_trans_allocate_array_storage): Check flag_stack_arrays,
>        properly expand variable length arrays.
>        (gfc_trans_auto_array_allocation): If flag_stack_arrays create
>        variable length decls and associate them with their scope.
>        * gfortran.h (gfc_option_t): Add flag_stack_arrays member.
>        * options.c (gfc_init_options): Handle -fstack_arrays option.
>        * lang.opt (fstack-arrays): Add option.
>        * invoke.texi (Code Gen Options): Document it.
>        * Make-lang.in (trans-array.o): Depend on GIMPLE_H.
>
> Index: trans-array.c
> ===================================================================
> *** trans-array.c       (revision 172206)
> --- trans-array.c       (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 81,86 ****
> --- 81,87 ----
>  #include "system.h"
>  #include "coretypes.h"
>  #include "tree.h"
> + #include "gimple.h"
>  #include "diagnostic-core.h"  /* For internal_error/fatal_error.  */
>  #include "flags.h"
>  #include "gfortran.h"
> *************** gfc_trans_allocate_array_storage (stmtbl
> *** 630,647 ****
>      {
>        /* Allocate the temporary.  */
>        onstack = !dynamic && initial == NULL_TREE
> !                        && gfc_can_put_var_on_stack (size);
>
>        if (onstack)
>        {
>          /* Make a temporary variable to hold the data.  */
>          tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem),
>                                 nelem, gfc_index_one_node);
>          tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node,
>                                  tmp);
>          tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)),
>                                  tmp);
>          tmp = gfc_create_var (tmp, "A");
>          tmp = gfc_build_addr_expr (NULL_TREE, tmp);
>          gfc_conv_descriptor_data_set (pre, desc, tmp);
>        }
> --- 631,654 ----
>      {
>        /* Allocate the temporary.  */
>        onstack = !dynamic && initial == NULL_TREE
> !                        && (gfc_option.flag_stack_arrays
> !                            || gfc_can_put_var_on_stack (size));
>
>        if (onstack)
>        {
>          /* Make a temporary variable to hold the data.  */
>          tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE (nelem),
>                                 nelem, gfc_index_one_node);
> +         tmp = gfc_evaluate_now (tmp, pre);
>          tmp = build_range_type (gfc_array_index_type, gfc_index_zero_node,
>                                  tmp);
>          tmp = build_array_type (gfc_get_element_type (TREE_TYPE (desc)),
>                                  tmp);
>          tmp = gfc_create_var (tmp, "A");
> +         gfc_add_expr_to_block (pre,
> +                                fold_build1_loc (input_location,
> +                                                 DECL_EXPR, TREE_TYPE (tmp),
> +                                                 tmp));
>          tmp = gfc_build_addr_expr (NULL_TREE, tmp);
>          gfc_conv_descriptor_data_set (pre, desc, tmp);
>        }
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4744,4749 ****
> --- 4751,4758 ----
>    tree tmp;
>    tree size;
>    tree offset;
> +   tree space;
> +   tree inittree;
>    bool onstack;
>
>    gcc_assert (!(sym->attr.pointer || sym->attr.allocatable));
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4800,4814 ****
>        return;
>      }
>
> !   /* The size is the number of elements in the array, so multiply by the
> !      size of an element to get the total size.  */
> !   tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
> !   size = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
> !                         size, fold_convert (gfc_array_index_type, tmp));
>
> !   /* Allocate memory to hold the data.  */
> !   tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size);
> !   gfc_add_modify (&init, decl, tmp);
>
>    /* Set offset of the array.  */
>    if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> --- 4809,4838 ----
>        return;
>      }
>
> !   if (gfc_option.flag_stack_arrays)
> !     {
> !       gcc_assert (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE);
> !       space = build_decl (sym->declared_at.lb->location,
> !                         VAR_DECL, create_tmp_var_name ("A"),
> !                         TREE_TYPE (TREE_TYPE (decl)));
> !       gfc_trans_vla_type_sizes (sym, &init);
> !     }
> !   else
> !     {
> !       /* The size is the number of elements in the array, so multiply by the
> !        size of an element to get the total size.  */
> !       tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type));
> !       size = fold_build2_loc (input_location, MULT_EXPR, 
> gfc_array_index_type,
> !                             size, fold_convert (gfc_array_index_type, tmp));
>
> !       /* Allocate memory to hold the data.  */
> !       tmp = gfc_call_malloc (&init, TREE_TYPE (decl), size);
> !       gfc_add_modify (&init, decl, tmp);
> !
> !       /* Free the temporary.  */
> !       tmp = gfc_call_free (convert (pvoid_type_node, decl));
> !       space = NULL_TREE;
> !     }
>
>    /* Set offset of the array.  */
>    if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> *************** gfc_trans_auto_array_allocation (tree de
> *** 4817,4826 ****
>    /* Automatic arrays should not have initializers.  */
>    gcc_assert (!sym->value);
>
> !   /* Free the temporary.  */
> !   tmp = gfc_call_free (convert (pvoid_type_node, decl));
>
> !   gfc_add_init_cleanup (block, gfc_finish_block (&init), tmp);
>  }
>
>
> --- 4841,4866 ----
>    /* Automatic arrays should not have initializers.  */
>    gcc_assert (!sym->value);
>
> !   inittree = gfc_finish_block (&init);
> !
> !   if (space)
> !     {
> !       tree addr;
> !       pushdecl (space);
>
> !       /* Don't create new scope, emit the DECL_EXPR in exactly the scope
> !          where also space is located.  */
> !       gfc_init_block (&init);
> !       tmp = fold_build1_loc (input_location, DECL_EXPR,
> !                            TREE_TYPE (space), space);
> !       gfc_add_expr_to_block (&init, tmp);
> !       addr = fold_build1_loc (sym->declared_at.lb->location,
> !                             ADDR_EXPR, TREE_TYPE (decl), space);
> !       gfc_add_modify (&init, decl, addr);
> !       gfc_add_init_cleanup (block, gfc_finish_block (&init), NULL_TREE);
> !       tmp = NULL_TREE;
> !     }
> !   gfc_add_init_cleanup (block, inittree, tmp);
>  }
>
>
> Index: Make-lang.in
> ===================================================================
> *** Make-lang.in        (revision 172206)
> --- Make-lang.in        (working copy)
> *************** fortran/trans-stmt.o: $(GFORTRAN_TRANS_D
> *** 353,359 ****
>  fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS)
>  fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \
>    fortran/ioparm.def
> ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS)
>  fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \
>    gt-fortran-trans-intrinsic.h
>  fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
> --- 353,359 ----
>  fortran/trans-openmp.o: $(GFORTRAN_TRANS_DEPS)
>  fortran/trans-io.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-io.h \
>    fortran/ioparm.def
> ! fortran/trans-array.o: $(GFORTRAN_TRANS_DEPS) $(GIMPLE_H)
>  fortran/trans-intrinsic.o: $(GFORTRAN_TRANS_DEPS) fortran/mathbuiltins.def \
>    gt-fortran-trans-intrinsic.h
>  fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
> Index: gfortran.h
> ===================================================================
> *** gfortran.h  (revision 172206)
> --- gfortran.h  (working copy)
> *************** typedef struct
> *** 2220,2225 ****
> --- 2220,2226 ----
>    int flag_d_lines;
>    int gfc_flag_openmp;
>    int flag_sign_zero;
> +   int flag_stack_arrays;
>    int flag_module_private;
>    int flag_recursive;
>    int flag_init_local_zero;
> Index: lang.opt
> ===================================================================
> *** lang.opt    (revision 172206)
> --- lang.opt    (working copy)
> *************** fmax-stack-var-size=
> *** 454,459 ****
> --- 454,463 ----
>  Fortran RejectNegative Joined UInteger
>  -fmax-stack-var-size=<n>      Size in bytes of the largest array that will 
> be put on the stack
>
> + fstack-arrays
> + Fortran
> + Put all local arrays on stack.
> +
>  fmodule-private
>  Fortran
>  Set default accessibility of module entities to PRIVATE.
> Index: invoke.texi
> ===================================================================
> *** invoke.texi (revision 172206)
> --- invoke.texi (working copy)
> *************** and warnings}.
> *** 167,172 ****
> --- 167,173 ----
>  -fbounds-check -fcheck-array-temporaries  -fmax-array-constructor =@var{n} 
> @gol
>  -fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol
>  -fcoarray=@var{<none|single|lib>} -fmax-stack-var-size=@var{n} @gol
> + -fstack-arrays @gol
>  -fpack-derived  -frepack-arrays  -fshort-enums  -fexternal-blas @gol
>  -fblas-matmul-limit=@var{n} -frecursive -finit-local-zero @gol
>  -finit-integer=@var{n} -finit-real=@var{<zero|inf|-inf|nan|snan>} @gol
> *************** Future versions of GNU Fortran may impro
> *** 1361,1366 ****
> --- 1362,1374 ----
>
>  The default value for @var{n} is 32768.
>
> + @item -fstack-arrays
> + @opindex @code{fstack-arrays}
> + Adding this option will make the fortran compiler put all local arrays,
> + even those of unknown size onto stack memory.  If your program uses very
> + large local arrays it's possible that you'll have to extend your runtime
> + limits for stack memory on some operating systems.
> +
>  @item -fpack-derived
>  @opindex @code{fpack-derived}
>  @cindex structure packing
> Index: options.c
> ===================================================================
> *** options.c   (revision 172206)
> --- options.c   (working copy)
> *************** gfc_init_options (unsigned int decoded_o
> *** 123,128 ****
> --- 123,129 ----
>
>    /* Default value of flag_max_stack_var_size is set in gfc_post_options.  */
>    gfc_option.flag_max_stack_var_size = -2;
> +   gfc_option.flag_stack_arrays = 0;
>
>    gfc_option.flag_range_check = 1;
>    gfc_option.flag_pack_derived = 0;
> *************** gfc_handle_option (size_t scode, const c
> *** 783,788 ****
> --- 784,793 ----
>        gfc_option.flag_max_stack_var_size = value;
>        break;
>
> +     case OPT_fstack_arrays:
> +       gfc_option.flag_stack_arrays = value;
> +       break;
> +
>      case OPT_fmodule_private:
>        gfc_option.flag_module_private = value;
>        break;
>



-- 
The knack of flying is learning how to throw yourself at the ground and miss.
       --Hitchhikers Guide to the Galaxy

Reply via email to