Dear all,

Any idea why ? I've looked around in the code to see how they parse
the data dependance tree but I don't see a difference.

Interesting.
So what statements *are* in the list of data dependences if not these.

Ok apparently it's more a problem of optimization levels in O3 the
compiler does not seem to generate the calls but in O1 it does.

Here is the test case :

#include <stdio.h>

int fct(int *t);

int main()
{
     int tab[10];
     int i;

     printf("Hello World %d\n",tab[5]);

     printf("Sum is : %d\n",fct(tab));

     return 0;
}

int fct(int *t)
{
     int i=9;
     int res;


     while(i>=0) {
         if(t[i]<t[9-i])
             res += t[i]+t[9-i];
         i--;
     }

     return res;
}


This is the compilation options :

-O3 -ftree-load-inst


Any ideas ?

And here is the patch :

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi     (revision 116373)
+++ doc/invoke.texi     (working copy)
@@ -342,7 +342,7 @@
  -fsplit-ivs-in-unroller -funswitch-loops @gol
  -fvariable-expansion-in-unroller @gol
  -ftree-pre  -ftree-ccp  -ftree-dce -ftree-loop-optimize @gol
--ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
+-ftree-loop-linear -ftree-load-inst -ftree-loop-im
-ftree-loop-ivcanon -fivopts @gol
  -ftree-dominator-opts -ftree-dse -ftree-copyrename -ftree-sink @gol
  -ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
  -ftree-vect-loop-version -ftree-salias -fipa-pta -fweb @gol
@@ -5120,6 +5120,10 @@
  Perform linear loop transformations on tree.  This flag can improve cache
  performance and allow further loop optimizations to take place.

[EMAIL PROTECTED] -ftree-load-inst
+Perform instrumentation of load on trees. This flag inserts a call to
a profiling
+function before the loads of a program.
+
  @item -ftree-loop-im
Perform loop invariant motion on trees. This pass moves only invariants that
  would be hard to handle at RTL level (function calls, operations
that expand to
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 116373)
+++ tree-pass.h (working copy)
@@ -251,6 +251,7 @@
  extern struct tree_opt_pass pass_record_bounds;
  extern struct tree_opt_pass pass_if_conversion;
  extern struct tree_opt_pass pass_vectorize;
+extern struct tree_opt_pass pass_load_inst;
  extern struct tree_opt_pass pass_complete_unroll;
  extern struct tree_opt_pass pass_loop_prefetch;
  extern struct tree_opt_pass pass_iv_optimize;
Index: tree-load-inst.c
===================================================================
--- tree-load-inst.c    (revision 0)
+++ tree-load-inst.c    (revision 0)
@@ -0,0 +1,139 @@
+#include <stdlib.h>
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "flags.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "langhooks.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "output.h"
+#include "expr.h"
+#include "function.h"
+#include "diagnostic.h"
+#include "bitmap.h"
+#include "pointer-set.h"
+#include "tree-flow.h"
+#include "tree-gimple.h"
+#include "tree-inline.h"
+#include "varray.h"
+#include "timevar.h"
+#include "hashtab.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "toplev.h"
+#include "target.h"
+#include "cfgloop.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "lambda.h"
+#include "coverage.h"
+
+extern struct loops *current_loops;
+static void tree_handle_loop (struct loops *loops);
+
+
+static unsigned int tree_load_inst (void)
+{
+  fprintf (stderr, "My load instrumentation start %p\n",current_loops);
+
+  if (current_loops == NULL)
+    {
+      fprintf (stderr,"No loop\n");
+      return 0;
+    }
+
+  tree_handle_loop (current_loops);
+  return 0;
+}
+
+void tree_handle_loop (struct loops *loops)
+{
+  unsigned int i;
+  unsigned int j;
+  static int compteur_interne = 0;
+
+  for (i = 1; i<loops->num; i++)
+    {
+      struct loop *loop_nest = loops->parray[i];
+
+      //If no loop_nest
+      if (!loop_nest)
+        continue;
+
+      VEC (ddr_p, heap) *dependence_relations;
+      VEC (data_reference_p, heap) *datarefs;
+
+      datarefs = VEC_alloc (data_reference_p, heap, 10);
+      dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+      compute_data_dependences_for_loop (loop_nest, true, &datarefs,
+                                         &dependence_relations);
+
+      tree call_type = build_function_type_list (void_type_node,
+                                                 integer_type_node,
+                                                 NULL_TREE);
+      tree call_fn = build_fn_decl ("__MarkovMainEntry",call_type);
+
+      data_reference_p a;
+      for (j = 0; VEC_iterate (data_reference_p, datarefs, j, a); j++)
+        {
+          tree stmt = DR_STMT (a);
+
+          /* On fait nos test pour voir si c'est un load */
+          if (DR_IS_READ (a))
+            {
+              printf ("Have a load : %d\n", compteur_interne);
+              tree compteur = build_int_cst (integer_type_node,
compteur_interne);
+              compteur_interne++;
+
+              /* Generation de l'instruction pour l'appel */
+              tree args = tree_cons(NULL_TREE,//build_tree_list
(NULL_TREE, compteur);
+                                    compteur,
+                                    NULL_TREE);
+
+              /* Je suppose que args est correctement mis en place */
+              tree call = build_function_call_expr (call_fn, args);
+
+              mark_new_vars_to_rename(call);
+              block_stmt_iterator bsi;
+              bsi = bsi_for_stmt (stmt);
+              bsi_insert_before (&bsi, call, BSI_SAME_STMT);
+
+            }
+        }
+      VEC_free (data_reference_p, heap, datarefs);
+      VEC_free (ddr_p, heap, dependence_relations);
+    }
+
+  debug_loop_ir ();
+  fprintf (stderr,"My load instrumentation stop\n");
+}
+
+static bool gate_tree_load_inst (void)
+{
+  return flag_tree_load_inst;
+}
+
+
+struct tree_opt_pass pass_load_inst =
+  {
+    "loadinst",                           /* name */
+    gate_tree_load_inst,                  /* gate */
+    tree_load_inst,                       /* execute */
+    NULL,                                 /* sub */
+    NULL,                                 /* next */
+    0,                                    /* static_pass_number */
+    TV_TREE_LOAD_INST,                    /* tv_id */
+    PROP_cfg|PROP_ssa,                    /* properties_required */
+    0,                                    /* properties_provided */
+    0,                                    /* properties_destroyed */
+    0,                                    /* todo_flags_start */
+    TODO_update_ssa|TODO_verify_stmts|TODO_verify_loops,    /*
todo_flags_finish */
+    0                                        /* letter */
+  };
+
Index: timevar.def
===================================================================
--- timevar.def (revision 116373)
+++ timevar.def (working copy)
@@ -108,6 +108,7 @@
  DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
  DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
  DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_LOAD_INST        , "tree load instrumentation")
  DEFTIMEVAR (TV_TREE_PREFETCH       , "tree prefetching")
  DEFTIMEVAR (TV_TREE_LOOP_IVOPTS            , "tree iv optimization")
  DEFTIMEVAR (TV_TREE_LOOP_INIT      , "tree loop init")
Index: profile.c
===================================================================
--- profile.c   (revision 116373)
+++ profile.c   (working copy)
@@ -1054,6 +1054,7 @@
      profile_status = PROFILE_READ;
    coverage_end_function ();
  }
+
  
  /* Union find algorithm implementation for the basic blocks using
     aux fields.  */
Index: common.opt
===================================================================
--- common.opt  (revision 116373)
+++ common.opt  (working copy)
@@ -965,6 +965,10 @@
  Common Report Var(flag_tree_loop_linear)
  Enable linear loop transforms on trees

+ftree-load-inst
+Common Report Var(flag_tree_load_inst)
+Enable load instrumentation on trees
+
  ftree-loop-ivcanon
  Common Report Var(flag_tree_loop_ivcanon) Init(1)
  Create canonical induction variables in loops
Index: tree-profile.c
===================================================================
--- tree-profile.c      (revision 116373)
+++ tree-profile.c      (working copy)
@@ -175,7 +175,7 @@
                    tree_cons (NULL_TREE, val,
                               NULL_TREE));
    call = build_function_call_expr (tree_pow2_profiler_fn, args);
-  bsi_insert_before (&bsi, call, BSI_SAME_STMT);
+  bsi_insert_before(&bsi, call, BSI_SAME_STMT);
  }

  /* Output instructions as GIMPLE trees for code to find the most
common value.
Index: tree-flow.h
===================================================================
--- tree-flow.h (revision 116373)
+++ tree-flow.h (working copy)
@@ -949,6 +949,9 @@
  /* In tree-ssa-structalias.c */
  bool find_what_p_points_to (tree);

+/* In tree-load-inst.c */
+extern void load_inst(struct loops *);
+
  /* In tree-ssa-live.c */
  extern void remove_unused_locals (void);

Index: Makefile.in
===================================================================
--- Makefile.in (revision 116373)
+++ Makefile.in (working copy)
@@ -988,6 +988,7 @@
   tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o         \
   tree-ssa-math-opts.o                                                    \
   tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o               
   \
+ tree-load-inst.o \
   alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o               
   \
   cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o                
   \
   cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o       
   \
@@ -2104,6 +2105,11 @@
     $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
     tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
     $(TARGET_H) tree-chrec.h
+tree-load-inst.o: tree-load-inst.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
+   $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
+   tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
+   $(TARGET_H) tree-chrec.h $(VARRAY_H)
  tree-stdarg.o: tree-stdarg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
     $(TREE_H) $(FUNCTION_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) tree-pass.h \
     tree-stdarg.h $(TARGET_H) langhooks.h
Index: passes.c
===================================================================
--- passes.c    (revision 116373)
+++ passes.c    (working copy)
@@ -607,6 +607,7 @@
       pass_may_alias.  */
    NEXT_PASS (pass_complete_unroll);
    NEXT_PASS (pass_loop_prefetch);
+  NEXT_PASS (pass_load_inst);
    NEXT_PASS (pass_iv_optimize);
    NEXT_PASS (pass_tree_loop_done);
    *p = NULL;





----- Fin du message transféré -----


-----------------------------------------------------
‹Degskalle› There is no point in arguing with an idiot, they will just
drag you down to their level and beat you with experience

Référence: http://www.bash.org/?latest
-----------------------------------------------------


Reply via email to