Dear all,
Any idea why ? I've looked around in the code to see how they parse
the data dependance tree but I don't see a difference.
Interesting.
So what statements *are* in the list of data dependences if not these.
Ok apparently it's more a problem of optimization levels in O3 the
compiler does not seem to generate the calls but in O1 it does.
Here is the test case :
#include <stdio.h>
int fct(int *t);
int main()
{
int tab[10];
int i;
printf("Hello World %d\n",tab[5]);
printf("Sum is : %d\n",fct(tab));
return 0;
}
int fct(int *t)
{
int i=9;
int res;
while(i>=0) {
if(t[i]<t[9-i])
res += t[i]+t[9-i];
i--;
}
return res;
}
This is the compilation options :
-O3 -ftree-load-inst
Any ideas ?
And here is the patch :
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 116373)
+++ doc/invoke.texi (working copy)
@@ -342,7 +342,7 @@
-fsplit-ivs-in-unroller -funswitch-loops @gol
-fvariable-expansion-in-unroller @gol
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
--ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
+-ftree-loop-linear -ftree-load-inst -ftree-loop-im
-ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
-ftree-vect-loop-version -ftree-salias -fipa-pta -fweb @gol
@@ -5120,6 +5120,10 @@
Perform linear loop transformations on tree. This flag can improve cache
performance and allow further loop optimizations to take place.
[EMAIL PROTECTED] -ftree-load-inst
+Perform instrumentation of load on trees. This flag inserts a call to
a profiling
+function before the loads of a program.
+
@item -ftree-loop-im
Perform loop invariant motion on trees. This pass moves only
invariants that
would be hard to handle at RTL level (function calls, operations
that expand to
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 116373)
+++ tree-pass.h (working copy)
@@ -251,6 +251,7 @@
extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize;
+extern struct tree_opt_pass pass_load_inst;
extern struct tree_opt_pass pass_complete_unroll;
extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize;
Index: tree-load-inst.c
===================================================================
--- tree-load-inst.c (revision 0)
+++ tree-load-inst.c (revision 0)
@@ -0,0 +1,139 @@
+#include <stdlib.h>
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "flags.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "langhooks.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "output.h"
+#include "expr.h"
+#include "function.h"
+#include "diagnostic.h"
+#include "bitmap.h"
+#include "pointer-set.h"
+#include "tree-flow.h"
+#include "tree-gimple.h"
+#include "tree-inline.h"
+#include "varray.h"
+#include "timevar.h"
+#include "hashtab.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "toplev.h"
+#include "target.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "lambda.h"
+#include "coverage.h"
+
+extern struct loops *current_loops;
+static void tree_handle_loop (struct loops *loops);
+
+
+static unsigned int tree_load_inst (void)
+{
+ fprintf (stderr, "My load instrumentation start %p\n",current_loops);
+
+ if (current_loops == NULL)
+ {
+ fprintf (stderr,"No loop\n");
+ return 0;
+ }
+
+ tree_handle_loop (current_loops);
+ return 0;
+}
+
+void tree_handle_loop (struct loops *loops)
+{
+ unsigned int i;
+ unsigned int j;
+ static int compteur_interne = 0;
+
+ for (i = 1; i<loops->num; i++)
+ {
+ struct loop *loop_nest = loops->parray[i];
+
+ //If no loop_nest
+ if (!loop_nest)
+ continue;
+
+ VEC (ddr_p, heap) *dependence_relations;
+ VEC (data_reference_p, heap) *datarefs;
+
+ datarefs = VEC_alloc (data_reference_p, heap, 10);
+ dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+ compute_data_dependences_for_loop (loop_nest, true, &datarefs,
+ &dependence_relations);
+
+ tree call_type = build_function_type_list (void_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree call_fn = build_fn_decl ("__MarkovMainEntry",call_type);
+
+ data_reference_p a;
+ for (j = 0; VEC_iterate (data_reference_p, datarefs, j, a); j++)
+ {
+ tree stmt = DR_STMT (a);
+
+ /* On fait nos test pour voir si c'est un load */
+ if (DR_IS_READ (a))
+ {
+ printf ("Have a load : %d\n", compteur_interne);
+ tree compteur = build_int_cst (integer_type_node,
compteur_interne);
+ compteur_interne++;
+
+ /* Generation de l'instruction pour l'appel */
+ tree args = tree_cons(NULL_TREE,//build_tree_list
(NULL_TREE, compteur);
+ compteur,
+ NULL_TREE);
+
+ /* Je suppose que args est correctement mis en place */
+ tree call = build_function_call_expr (call_fn, args);
+
+ mark_new_vars_to_rename(call);
+ block_stmt_iterator bsi;
+ bsi = bsi_for_stmt (stmt);
+ bsi_insert_before (&bsi, call, BSI_SAME_STMT);
+
+ }
+ }
+ VEC_free (data_reference_p, heap, datarefs);
+ VEC_free (ddr_p, heap, dependence_relations);
+ }
+
+ debug_loop_ir ();
+ fprintf (stderr,"My load instrumentation stop\n");
+}
+
+static bool gate_tree_load_inst (void)
+{
+ return flag_tree_load_inst;
+}
+
+
+struct tree_opt_pass pass_load_inst =
+ {
+ "loadinst", /* name */
+ gate_tree_load_inst, /* gate */
+ tree_load_inst, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_LOAD_INST, /* tv_id */
+ PROP_cfg|PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa|TODO_verify_stmts|TODO_verify_loops, /*
todo_flags_finish */
+ 0 /* letter */
+ };
+
Index: timevar.def
===================================================================
--- timevar.def (revision 116373)
+++ timevar.def (working copy)
@@ -108,6 +108,7 @@
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_LOAD_INST , "tree load instrumentation")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
Index: profile.c
===================================================================
--- profile.c (revision 116373)
+++ profile.c (working copy)
@@ -1054,6 +1054,7 @@
profile_status = PROFILE_READ;
coverage_end_function ();
}
+
/* Union find algorithm implementation for the basic blocks using
aux fields. */
Index: common.opt
===================================================================
--- common.opt (revision 116373)
+++ common.opt (working copy)
@@ -965,6 +965,10 @@
Common Report Var(flag_tree_loop_linear)
Enable linear loop transforms on trees
+ftree-load-inst
+Common Report Var(flag_tree_load_inst)
+Enable load instrumentation on trees
+
ftree-loop-ivcanon
Common Report Var(flag_tree_loop_ivcanon) Init(1)
Create canonical induction variables in loops
Index: tree-profile.c
===================================================================
--- tree-profile.c (revision 116373)
+++ tree-profile.c (working copy)
@@ -175,7 +175,7 @@
tree_cons (NULL_TREE, val,
NULL_TREE));
call = build_function_call_expr (tree_pow2_profiler_fn, args);
- bsi_insert_before (&bsi, call, BSI_SAME_STMT);
+ bsi_insert_before(&bsi, call, BSI_SAME_STMT);
}
/* Output instructions as GIMPLE trees for code to find the most
common value.
Index: tree-flow.h
===================================================================
--- tree-flow.h (revision 116373)
+++ tree-flow.h (working copy)
@@ -949,6 +949,9 @@
/* In tree-ssa-structalias.c */
bool find_what_p_points_to (tree);
+/* In tree-load-inst.c */
+extern void load_inst(struct loops *);
+
/* In tree-ssa-live.c */
extern void remove_unused_locals (void);
Index: Makefile.in
===================================================================
--- Makefile.in (revision 116373)
+++ Makefile.in (working copy)
@@ -988,6 +988,7 @@
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o
\
+ tree-load-inst.o \
alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o
\
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o
\
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o
\
@@ -2104,6 +2105,11 @@
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
$(TARGET_H) tree-chrec.h
+tree-load-inst.o: tree-load-inst.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
+ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
+ tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
+ $(TARGET_H) tree-chrec.h $(VARRAY_H)
tree-stdarg.o: tree-stdarg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) $(FUNCTION_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) tree-pass.h \
tree-stdarg.h $(TARGET_H) langhooks.h
Index: passes.c
===================================================================
--- passes.c (revision 116373)
+++ passes.c (working copy)
@@ -607,6 +607,7 @@
pass_may_alias. */
NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_loop_prefetch);
+ NEXT_PASS (pass_load_inst);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done);
*p = NULL;
----- Fin du message transféré -----
-----------------------------------------------------
Degskalle There is no point in arguing with an idiot, they will just
drag you down to their level and beat you with experience
Référence: http://www.bash.org/?latest
-----------------------------------------------------