Jakub,

consider attached patch, which adds pass_lim after fre1 (a simplification of my oacc kernels patch series).

The included testcase lim-before-stdarg.c fails.

The first sign of trouble is in lim-before-stdarg.c.088t.stdarg (attached):
...
gen_rtvec: va_list escapes 0, needs to save 0 GPR units and 0 FPR units.
...

Because of the 'need to save 0 GPRs units', at expand no prologue is generated to dump the varargs in registers onto stack.

However, the varargs are still read from stack and are therefore undefined, which valgrind observes:
...
==6254== Conditional jump or move depends on uninitialised value(s)
==6254==    at 0x4005AB: gen_rtvec (in a.out)
==6254==    by 0x400411: main (in a.out)
...
and as a result the test executable aborts.

AFAIU, stdarg recognizes a va_arg item by looking for 'ap[0].field' references (in our example, p.gp_offset) of the form 'ap[0].field = temp' and 'temp = ap[0].field'.

With -fno-tree-loop-im, we find both read and write references in the loop:
...
  <bb 5>:
  # i_28 = PHI <i_23(8), 0(4)>
  _12 = p.gp_offset;                    <<<
  if (_12 > 47)
    goto <bb 7>;
  else
    goto <bb 6>;

  <bb 6>:
  _13 = p.reg_save_area;
  _14 = (sizetype) _12;
  addr.0_15 = _13 + _14;
  _16 = _12 + 8;
  p.gp_offset = _16;                    <<<
  goto <bb 8>;

  <bb 7>:
  _18 = p.overflow_arg_area;
  _19 = _18 + 8;
  p.overflow_arg_area = _19;

  <bb 8>:
  # addr.0_3 = PHI <addr.0_15(6), _18(7)>
  _21 = MEM[(void * * {ref-all})addr.0_3];
  rt_val_11->elem[i_28] = _21;
  i_23 = i_28 + 1;
  if (n_9(D) > i_23)
    goto <bb 5>;
  else
    goto <bb 9>;
...

But with -ftree-loop-im, that's no longer the case. We just find one reference, before the loop, a read:
...
  <bb 2>:
  __builtin_va_start (&p, 0);
  if (n_8(D) == 0)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_va_end (&p);
  goto <bb 10>;

  <bb 4>:
  rt_val_12 = rtvec_alloc (n_8(D));
  p_gp_offset_lsm.4_31 = p.gp_offset;                           <<<
  _15 = p.reg_save_area;
  p_overflow_arg_area_lsm.6_33 = p.overflow_arg_area;
  if (n_8(D) > 0)
    goto <bb 5>;
  else
    goto <bb 9>;
...

pass_stdarg recognizes the reference as a read in va_list_counter_struct_op, and calls va_list_counter_op. But since it's a read that is only executed once, there's no effect on cfun->va_list_gpr_size:
...
va_list_counter_op (si=0x7fffffffd7f0, ap=0x7ffff6963540, var=0x7ffff696b948, gpr_p=true, write_p=false)
    at src/gcc/tree-stdarg.c:323
323       if (si->compute_sizes < 0)
(gdb) n
325           si->compute_sizes = 0;
(gdb)
326           if (si->va_start_count == 1
(gdb)
327               && reachable_at_most_once (si->bb, si->va_start_bb))
(gdb)
326           if (si->va_start_count == 1
(gdb)
328             si->compute_sizes = 1;
(gdb)
330           if (dump_file && (dump_flags & TDF_DETAILS))
(gdb)
339           && (increment = va_list_counter_bump (si, ap, var, gpr_p)) + 1 > 
1)
(gdb)
337       if (write_p
(gdb)
354       if (write_p || !si->compute_sizes)
(gdb)
361     }
...

Do I understand correctly that the assumptions of pass_stdarg are that:
- the reads and writes occur in pairs (I'm guessing that because the read above
  seems to be ignored. Also PR41089 seems to hint at this)
- the related memref occurs in the same loop nesting level as the pair
?

Any advice on how to fix this, or work around it?

Thanks,
- Tom
Run pass_lim after fre1

---
 gcc/passes.def                           |  3 ++
 gcc/testsuite/gcc.dg/lim-before-stdarg.c | 67 ++++++++++++++++++++++++++++++++
 gcc/tree-ssa-loop.c                      |  2 +
 3 files changed, 72 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/lim-before-stdarg.c

diff --git a/gcc/passes.def b/gcc/passes.def
index 2bc5dcd..03d749e 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -86,6 +86,9 @@ along with GCC; see the file COPYING3.  If not see
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
 	  NEXT_PASS (pass_fre);
+	  NEXT_PASS (pass_tree_loop_init);
+	  NEXT_PASS (pass_lim);
+	  NEXT_PASS (pass_tree_loop_done);
 	  NEXT_PASS (pass_merge_phi);
 	  NEXT_PASS (pass_cd_dce);
 	  NEXT_PASS (pass_early_ipa_sra);
diff --git a/gcc/testsuite/gcc.dg/lim-before-stdarg.c b/gcc/testsuite/gcc.dg/lim-before-stdarg.c
new file mode 100644
index 0000000..c7a6f03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lim-before-stdarg.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include <stdarg.h>
+
+typedef void *rtx;
+
+struct rtvec
+{
+  rtx elem[100];
+};
+typedef struct rtvec *rtvec;
+
+#define NULL_RTVEC ((void *)0)
+
+rtvec __attribute__((noinline,noclone))
+rtvec_alloc (int n)
+{
+  static struct rtvec v;
+
+  if (n != 2)
+    __builtin_abort ();
+
+  return &v;
+}
+
+rtvec __attribute__((noinline,noclone))
+gen_rtvec (int n, ...)
+{
+  int i;
+  rtvec rt_val;
+  va_list p;
+
+  va_start (p, n);
+
+  if (n == 0)
+    {
+      va_end (p);
+      return NULL_RTVEC;
+    }
+
+  rt_val = rtvec_alloc (n);
+
+  for (i = 0; i < n; i++)
+    rt_val->elem[i] = va_arg (p, rtx);
+
+  va_end (p);
+  return rt_val;
+}
+
+int
+main ()
+{
+  int a;
+  int b;
+  rtvec v;
+  int ok;
+  v = gen_rtvec (2, &a, &b);
+
+  ok = (v->elem[0] == &a
+	&& v->elem[1] == &b);
+
+  if (!ok)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index ccb8f97..7578c8d 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -227,6 +227,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *);
+  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
 
 }; // class pass_tree_loop_init
 
@@ -521,6 +522,7 @@ public:
 
   /* opt_pass methods: */
   virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
 
 }; // class pass_tree_loop_done
 
-- 
1.9.1

;; Function gen_rtvec (gen_rtvec, funcdef_no=1, decl_uid=1841, cgraph_uid=1, 
symbol_order=1)

gen_rtvec: va_list escapes 0, needs to save 0 GPR units and 0 FPR units.
gen_rtvec (int n)
{
  _Bool p_overflow_arg_area_lsm.7;
  void * p_overflow_arg_area_lsm.6;
  _Bool p_gp_offset_lsm.5;
  unsigned int p_gp_offset_lsm.4;
  struct  p[1];
  struct rtvec * rt_val;
  int i;
  struct rtvec * _2;
  void * addr.0_3;
  void * _15;
  sizetype _17;
  void * addr.0_18;
  unsigned int _20;
  void * _23;
  void * _24;

  <bb 2>:
  __builtin_va_start (&p, 0);
  if (n_8(D) == 0)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_va_end (&p);
  goto <bb 10>;

  <bb 4>:
  rt_val_12 = rtvec_alloc (n_8(D));
  p_gp_offset_lsm.4_31 = p.gp_offset;
  _15 = p.reg_save_area;
  p_overflow_arg_area_lsm.6_33 = p.overflow_arg_area;
  if (n_8(D) > 0)
    goto <bb 5>;
  else
    goto <bb 9>;

  <bb 5>:
  # i_14 = PHI <i_26(8), 0(4)>
  # p_overflow_arg_area_lsm.6_37 = PHI <p_overflow_arg_area_lsm.6_10(8), 
p_overflow_arg_area_lsm.6_33(4)>
  # p_gp_offset_lsm.4_39 = PHI <p_gp_offset_lsm.4_16(8), 
p_gp_offset_lsm.4_31(4)>
  if (p_gp_offset_lsm.4_39 > 47)
    goto <bb 7>;
  else
    goto <bb 6>;

  <bb 6>:
  _17 = (sizetype) p_gp_offset_lsm.4_39;
  addr.0_18 = _15 + _17;
  _20 = p_gp_offset_lsm.4_39 + 8;
  goto <bb 8>;

  <bb 7>:
  _23 = p_overflow_arg_area_lsm.6_37 + 8;

  <bb 8>:
  # addr.0_3 = PHI <addr.0_18(6), p_overflow_arg_area_lsm.6_37(7)>
  # p_gp_offset_lsm.4_16 = PHI <_20(6), p_gp_offset_lsm.4_39(7)>
  # p_overflow_arg_area_lsm.6_10 = PHI <p_overflow_arg_area_lsm.6_37(6), _23(7)>
  _24 = MEM[(void * * {ref-all})addr.0_3];
  rt_val_12->elem[i_14] = _24;
  i_26 = i_14 + 1;
  if (n_8(D) > i_26)
    goto <bb 5>;
  else
    goto <bb 9>;

  <bb 9>:
  __builtin_va_end (&p);

  <bb 10>:
  # _2 = PHI <0B(3), rt_val_12(9)>
  p ={v} {CLOBBER};
  return _2;

}


Reply via email to