The protection of the libitm stack from undo/redo writes got lost for
all the write-through TM methods and logging of thread-local data.  I
revived the code that we had before for the gtm_cacheline-based writes,
but it might have been broken before.

The problem is that we don't know where the bottom of the stack is
precisely, so we use __builtin_dwarf_cfa() to look for the bottom of the
stack frame of the current function.  However, we might call other
functions like memcpy to actually do the undo/redo memory transfers,
whose stack frames could then be at lower addresses and get corrupted.

In the patch, I add a "safety buffer" of 128 bytes to the assumed bottom
of the stack to give some more room for a memcpy call.  This is hack-ish
because (1) memcpy and other called functions might perhaps use larger
stack frames and (2) if the program uses an extremely tight stack space,
we might miss undo of updates that are not on our stack anymore.
Alternatively, we could avoid all calls when doing the undo but then we
have to reimplement a memcpy, which isn't pretty either or probably then
the existing optimized routines.

Any suggestions for improvement?  Or OK as-is for trunk?  Thanks!
commit 915a2fbc999cf5bc36beb50c1fb44cc8ca281a02
Author: Torvald Riegel <trie...@redhat.com>
Date:   Sun Jan 8 20:12:33 2012 +0100

    libitm: Filter out undo writes that overlap with the libitm stack.
    
        libitm/
        * config/generic/tls.h (GTM::mask_stack_top,
        GTM::mask_stack_bottom): New.
        * local.cc (gtm_undolog::rollback): Filter out any updates that
        overlap the libitm stack.  Add current transaction as parameter.
        * libitm_i.h (GTM::gtm_undolog::rollback): Adapt.
        * beginend.cc (GTM::gtm_thread::rollback): Adapt.
        * testsuite/libitm.c/stackundo.c: New test.

diff --git a/libitm/beginend.cc b/libitm/beginend.cc
index fe14f32..08c2174 100644
--- a/libitm/beginend.cc
+++ b/libitm/beginend.cc
@@ -327,7 +327,7 @@ GTM::gtm_thread::rollback (gtm_transaction_cp *cp, bool 
aborting)
   // data. Because of the latter, we have to roll it back before any
   // dispatch-specific rollback (which handles synchronization with other
   // transactions).
-  undolog.rollback (cp ? cp->undolog_size : 0);
+  undolog.rollback (this, cp ? cp->undolog_size : 0);
 
   // Perform dispatch-specific rollback.
   abi_disp()->rollback (cp);
diff --git a/libitm/config/generic/tls.h b/libitm/config/generic/tls.h
index 6bbdccf..07efef3 100644
--- a/libitm/config/generic/tls.h
+++ b/libitm/config/generic/tls.h
@@ -60,6 +60,25 @@ static inline abi_dispatch * abi_disp() { return 
_gtm_thr_tls.disp; }
 static inline void set_abi_disp(abi_dispatch *x) { _gtm_thr_tls.disp = x; }
 #endif
 
+#ifndef HAVE_ARCH_GTM_MASK_STACK
+// To filter out any updates that overlap the libitm stack, we define
+// gtm_mask_stack_top to the entry point to the library and
+// gtm_mask_stack_bottom to below current function.  This
+// definition should be fine for all stack-grows-down architectures.
+// FIXME We fake the bottom to be lower so that we are safe even if we might
+// call further functions (compared to where we called gtm_mask_stack_bottom
+// in the call hierarchy) to actually undo or redo writes (e.g., memcpy).
+// This is a completely arbitrary value; can we instead ensure that there are
+// no such calls, or can we determine a future-proof value otherwise?
+static inline void *
+mask_stack_top(gtm_thread *tx) { return tx->jb.cfa; }
+static inline void *
+mask_stack_bottom(gtm_thread *tx)
+{
+  return (uint8_t*)__builtin_dwarf_cfa() - 128;
+}
+#endif
+
 } // namespace GTM
 
 #endif // LIBITM_TLS_H
diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h
index f922d22..f849654 100644
--- a/libitm/libitm_i.h
+++ b/libitm/libitm_i.h
@@ -138,7 +138,7 @@ struct gtm_undolog
   size_t size() const { return undolog.size(); }
 
   // In local.cc
-  void rollback (size_t until_size = 0);
+  void rollback (gtm_thread* tx, size_t until_size = 0);
 };
 
 // Contains all thread-specific data required by the entire library.
diff --git a/libitm/local.cc b/libitm/local.cc
index 39b6da3..0c8185b 100644
--- a/libitm/local.cc
+++ b/libitm/local.cc
@@ -26,11 +26,17 @@
 
 namespace GTM HIDDEN {
 
-
-void
-gtm_undolog::rollback (size_t until_size)
+// ??? Does this function really needs to be noinline?  Without it, we can
+// potentially be inlined and use the same stack frame as the function with
+// the transaction that we are rolling back; however, that shouldn't be a
+// a problem because we should never have logged any stack slot that libitm
+// might use during rollback.
+void __attribute__((noinline))
+gtm_undolog::rollback (gtm_thread* tx, size_t until_size)
 {
   size_t i, n = undolog.size();
+  void *top = mask_stack_top(tx);
+  void *bot = mask_stack_bottom(tx);
 
   if (n > 0)
     {
@@ -40,7 +46,17 @@ gtm_undolog::rollback (size_t until_size)
           size_t len = undolog[i];
           size_t words = (len + sizeof(gtm_word) - 1) / sizeof(gtm_word);
           i -= words;
-          __builtin_memcpy (ptr, &undolog[i], len);
+          // Filter out any updates that overlap the libitm stack.  We don't
+          // bother filtering out just the overlapping bytes because we don't
+          // merge writes and thus any overlapping write is either bogus or
+          // would restore data on stack frames that are not in use anymore.
+          // FIXME The memcpy can/will end up as another call but we
+          // calculated BOT based on the current function.  Can we inline or
+          // reimplement this without too much trouble due to unaligned calls
+          // and still have good performance, so that we can remove the hack
+          // in gtm_mask_stack_bottom()?
+          if (likely(ptr > top || (uint8_t*)ptr + len <=bot))
+            __builtin_memcpy (ptr, &undolog[i], len);
        }
     }
 }
diff --git a/libitm/testsuite/libitm.c/stackundo.c 
b/libitm/testsuite/libitm.c/stackundo.c
new file mode 100644
index 0000000..02759d7
--- /dev/null
+++ b/libitm/testsuite/libitm.c/stackundo.c
@@ -0,0 +1,23 @@
+int __attribute__((noinline)) test2(int x[1000])
+{
+  int i;
+  return x[12];
+}
+
+int __attribute__((noinline)) test1()
+{
+  int x[1000], i;
+
+  for (i = 0; i < 1000; i++)
+    x[i] = i;
+  return test2(x);
+}
+
+int main()
+{
+  __transaction_atomic {
+    if (test1() !=0)
+      __transaction_cancel;
+  }
+  return 0;
+}

Reply via email to