tfiala created this revision.
tfiala added a reviewer: labath.
tfiala added a subscriber: lldb-commits.

The race boiled down to this:

    If a test worker queue is able to run the test inferior and
    clean up before the dosep.py listener socket is spun up, and
    the worker queue is the last one (as would be the case when
    there's only one test rerunning in the rerun queue), then
    the test suite will exit the main loop before having a chance
    to process any test events coming from the test inferior or
    the worker queue job control.

    I found this race to be far more likely on fast hardware.
    Our Linux CI is one such example.  While it will show
    up primarily during meta test events generated by
    a worker thread when a test inferior times out or
    exits with an exceptional exit (e.g. seg fault), it only
    requires that the OS takes longer to hook up the
    listener socket than it takes for the final test inferior
    and worker thread to shut down.

http://reviews.llvm.org/D19214

Files:
  packages/Python/lldbsuite/test/dosep.py
  packages/Python/lldbsuite/test/dotest_channels.py
  packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
  packages/Python/lldbsuite/test/result_formatter.py

Index: packages/Python/lldbsuite/test/result_formatter.py
===================================================================
--- packages/Python/lldbsuite/test/result_formatter.py
+++ packages/Python/lldbsuite/test/result_formatter.py
@@ -76,6 +76,18 @@
 
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         sock.connect(("localhost", port))
+
+        # Wait for the ack from the listener side.
+        # This is needed to prevent a race condition
+        # in the main dosep.py processing loop: we
+        # can't allow a worker queue thread to die
+        # that has outstanding messages to a listener
+        # socket before the listener socket asyncore
+        # listener socket gets spun up; otherwise,
+        # we lose the test result info.
+        read_bytes = sock.recv(1)
+        # print("\n** socket creation: received ack: {}".format(ord(read_bytes[0])), file=sys.stderr)
+
         return (sock, lambda: socket_closer(sock))
 
     default_formatter_name = None
Index: packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
===================================================================
--- packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
+++ packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
@@ -3,19 +3,21 @@
 
 import time
 
-import lldbsuite.test.lldbtest as lldbtest
+import lldbsuite.test.decorators as decorators
 import rerun_base
 
 
 class RerunTimeoutTestCase(rerun_base.RerunBaseTestCase):
-    @lldbtest.no_debug_info_test
+    @decorators.no_debug_info_test
     def test_timeout_rerun_succeeds(self):
-        """Tests that timeout logic kicks in and is picked up."""
+        """Tests that the timeout logic kicks in and that this timeout is picked up."""
         if not self.should_generate_issue():
             # We pass this time.
             return
+
         # We time out this time.
         while True:
+            # noinspection PyBroadException
             try:
                 time.sleep(1)
             except:
Index: packages/Python/lldbsuite/test/dotest_channels.py
===================================================================
--- packages/Python/lldbsuite/test/dotest_channels.py
+++ packages/Python/lldbsuite/test/dotest_channels.py
@@ -55,6 +55,14 @@
             # unpickled results.
             raise Exception("forwarding function must be set")
 
+        # Initiate all connections by sending an ack.  This allows
+        # the initiators of the socket to await this to ensure
+        # that this end is up and running (and therefore already
+        # into the async map).
+        ack_bytes = bytearray()
+        ack_bytes.append(chr(42))
+        file_object.send(ack_bytes)
+
     def deserialize_payload(self):
         """Unpickles the collected input buffer bytes and forwards."""
         if len(self.ibuffer) > 0:
Index: packages/Python/lldbsuite/test/dosep.py
===================================================================
--- packages/Python/lldbsuite/test/dosep.py
+++ packages/Python/lldbsuite/test/dosep.py
@@ -109,13 +109,17 @@
         global GET_WORKER_INDEX
         GET_WORKER_INDEX = get_worker_index_use_pid
 
-def report_test_failure(name, command, output):
+def report_test_failure(name, command, output, timeout):
     global output_lock
     with output_lock:
         if not (RESULTS_FORMATTER and RESULTS_FORMATTER.is_using_terminal()):
             print(file=sys.stderr)
             print(output, file=sys.stderr)
-            print("[%s FAILED]" % name, file=sys.stderr)
+            if timeout:
+                timeout_str = " (TIMEOUT)"
+            else:
+                timeout_str = ""
+            print("[%s FAILED]%s" % (name, timeout_str), file=sys.stderr)
             print("Command invoked: %s" % ' '.join(command), file=sys.stderr)
         update_progress(name)
 
@@ -211,7 +215,7 @@
             # only stderr does.
             report_test_pass(self.file_name, output[1])
         else:
-            report_test_failure(self.file_name, command, output[1])
+            report_test_failure(self.file_name, command, output[1], was_timeout)
 
         # Save off the results for the caller.
         self.results = (
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to