tfiala created this revision. tfiala added a reviewer: labath. tfiala added a subscriber: lldb-commits.
The race boiled down to this: If a test worker queue is able to run the test inferior and clean up before the dosep.py listener socket is spun up, and the worker queue is the last one (as would be the case when there's only one test rerunning in the rerun queue), then the test suite will exit the main loop before having a chance to process any test events coming from the test inferior or the worker queue job control. I found this race to be far more likely on fast hardware. Our Linux CI is one such example. While it will show up primarily during meta test events generated by a worker thread when a test inferior times out or exits with an exceptional exit (e.g. seg fault), it only requires that the OS takes longer to hook up the listener socket than it takes for the final test inferior and worker thread to shut down. http://reviews.llvm.org/D19214 Files: packages/Python/lldbsuite/test/dosep.py packages/Python/lldbsuite/test/dotest_channels.py packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park packages/Python/lldbsuite/test/result_formatter.py
Index: packages/Python/lldbsuite/test/result_formatter.py =================================================================== --- packages/Python/lldbsuite/test/result_formatter.py +++ packages/Python/lldbsuite/test/result_formatter.py @@ -76,6 +76,18 @@ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(("localhost", port)) + + # Wait for the ack from the listener side. + # This is needed to prevent a race condition + # in the main dosep.py processing loop: we + # can't allow a worker queue thread to die + # that has outstanding messages to a listener + # socket before the listener socket asyncore + # listener socket gets spun up; otherwise, + # we lose the test result info. + read_bytes = sock.recv(1) + # print("\n** socket creation: received ack: {}".format(ord(read_bytes[0])), file=sys.stderr) + return (sock, lambda: socket_closer(sock)) default_formatter_name = None Index: packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park =================================================================== --- packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park +++ packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park @@ -3,19 +3,21 @@ import time -import lldbsuite.test.lldbtest as lldbtest +import lldbsuite.test.decorators as decorators import rerun_base class RerunTimeoutTestCase(rerun_base.RerunBaseTestCase): - @lldbtest.no_debug_info_test + @decorators.no_debug_info_test def test_timeout_rerun_succeeds(self): - """Tests that timeout logic kicks in and is picked up.""" + """Tests that the timeout logic kicks in and that this timeout is picked up.""" if not self.should_generate_issue(): # We pass this time. return + # We time out this time. while True: + # noinspection PyBroadException try: time.sleep(1) except: Index: packages/Python/lldbsuite/test/dotest_channels.py =================================================================== --- packages/Python/lldbsuite/test/dotest_channels.py +++ packages/Python/lldbsuite/test/dotest_channels.py @@ -55,6 +55,14 @@ # unpickled results. raise Exception("forwarding function must be set") + # Initiate all connections by sending an ack. This allows + # the initiators of the socket to await this to ensure + # that this end is up and running (and therefore already + # into the async map). + ack_bytes = bytearray() + ack_bytes.append(chr(42)) + file_object.send(ack_bytes) + def deserialize_payload(self): """Unpickles the collected input buffer bytes and forwards.""" if len(self.ibuffer) > 0: Index: packages/Python/lldbsuite/test/dosep.py =================================================================== --- packages/Python/lldbsuite/test/dosep.py +++ packages/Python/lldbsuite/test/dosep.py @@ -109,13 +109,17 @@ global GET_WORKER_INDEX GET_WORKER_INDEX = get_worker_index_use_pid -def report_test_failure(name, command, output): +def report_test_failure(name, command, output, timeout): global output_lock with output_lock: if not (RESULTS_FORMATTER and RESULTS_FORMATTER.is_using_terminal()): print(file=sys.stderr) print(output, file=sys.stderr) - print("[%s FAILED]" % name, file=sys.stderr) + if timeout: + timeout_str = " (TIMEOUT)" + else: + timeout_str = "" + print("[%s FAILED]%s" % (name, timeout_str), file=sys.stderr) print("Command invoked: %s" % ' '.join(command), file=sys.stderr) update_progress(name) @@ -211,7 +215,7 @@ # only stderr does. report_test_pass(self.file_name, output[1]) else: - report_test_failure(self.file_name, command, output[1]) + report_test_failure(self.file_name, command, output[1], was_timeout) # Save off the results for the caller. self.results = (
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits