tfiala updated this revision to Diff 35788.
tfiala added a comment.

Work in progess.

This patch is working and timing out consistently on Linux (on a Jenkins bot), 
and is running normally on OS X.  I haven't coerced a timeout on OS X yet.

I'll be adding some tests for the timeout and core generation, and plan to add 
a --no-core-for-timeout option since I don't think that is universally 
desirable.  (Right now it always attempts to generates cores via SIGQUIT on 
Posix-like systems, but the code is there to do a SIGTERM instead if no cores 
are desired.  I just haven't hooked it up).

Zachary, you can see what I've got in place on the Windows side.  If you know 
some of that is wrong, feel free to let me now ahead of time and I can change 
it.

Hope to get those tests in tonight and have a formal "eval this patch" setup.


http://reviews.llvm.org/D13124

Files:
  test/dosep.py
  test/lldb_utils.py
  test/process_control.py

Index: test/process_control.py
===================================================================
--- /dev/null
+++ test/process_control.py
@@ -0,0 +1,474 @@
+"""
+The LLVM Compiler Infrastructure
+
+This file is distributed under the University of Illinois Open Source
+License. See LICENSE.TXT for details.
+
+Provides classes used by the test results reporting infrastructure
+within the LLDB test suite.
+
+
+This module provides process-management support for the LLDB test
+running infrasructure.
+"""
+
+# System imports
+import os
+import re
+import signal
+import subprocess
+import sys
+import threading
+
+
+class CommunicatorThread(threading.Thread):
+    """Provides a thread class that communicates with a subprocess."""
+    def __init__(self, process, event, output_file):
+        super(CommunicatorThread, self).__init__()
+        # Don't let this thread prevent shutdown.
+        self.daemon = True
+        self.process = process
+        self.pid = process.pid
+        self.event = event
+        self.output_file = output_file
+        self.output = None
+
+    def run(self):
+        try:
+            # Communicate with the child process.
+            # This will not complete until the child process terminates.
+            self.output = self.process.communicate()
+        except Exception as exception:  # pylint: disable=broad-except
+            if self.output_file:
+                self.output_file.write(
+                    "exception while using communicate() for pid: {}\n".format(
+                        exception))
+        finally:
+            # Signal that the thread's run is complete.
+            self.event.set()
+
+
+# Provides a regular expression for matching gtimeout-based durations.
+TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$")
+
+
+def timeout_to_seconds(timeout):
+    """Converts timeout/gtimeout timeout values into seconds.
+
+    @param timeout a timeout in the form of xm representing x minutes.
+
+    @return None if timeout is None, or the number of seconds as a float
+    if a valid timeout format was specified.
+    """
+    if timeout is None:
+        return None
+    else:
+        match = TIMEOUT_REGEX.match(timeout)
+        if match:
+            value = float(match.group(1))
+            units = match.group(2)
+            if units is None:
+                # default is seconds.  No conversion necessary.
+                return value
+            elif units == 's':
+                # Seconds.  No conversion necessary.
+                return value
+            elif units == 'm':
+                # Value is in minutes.
+                return 60.0 * value
+            elif units == 'h':
+                # Value is in hours.
+                return (60.0 * 60.0) * value
+            elif units == 'd':
+                # Value is in days.
+                return 24 * (60.0 * 60.0) * value
+            else:
+                raise Exception("unexpected units value '{}'".format(units))
+        else:
+            raise Exception("could not parse TIMEOUT spec '{}'".format(
+                timeout))
+
+
+class ProcessHelper(object):
+    """Provides an interface for accessing process-related functionality.
+
+    This class provides a factory method that gives the caller a
+    platform-specific implementation instance of the class.
+
+    Clients of the class should stick to the methods provided in this
+    base class.
+
+    @see ProcessHelper.process_helper()
+    """
+    def __init__(self):
+        super(ProcessHelper, self).__init__()
+
+    @classmethod
+    def process_helper(cls):
+        """Returns a platform-specific ProcessHelper instance.
+        @return a ProcessHelper instance that does the right thing for
+        the current platform.
+        """
+        if os.name == "nt":
+            return WindowsProcessHelper()
+        else:
+            return UnixProcessHelper()
+
+    def create_piped_process(self, command, new_process_group=True):
+        # pylint: disable=no-self-use,unused-argument
+        # As expected.  We want derived classes to implement this.
+        """Creates a subprocess.Popen-based class with I/O piped to the parent.
+
+        @param command the command line list as would be passed to
+        subprocess.Popen().  Use the list form rather than the string form.
+
+        @param new_process_group indicates if the caller wants the
+        process to be created in its own process group.  Each OS handles
+        this concept differently.  It provides a level of isolation and
+        can simplify or enable terminating the process tree properly.
+
+        @return a subprocess.Popen-like object.
+        """
+        raise Exception("derived class must implement")
+
+    def soft_terminate(self, popen_process, log_file=None, want_core=True):
+        # pylint: disable=no-self-use,unused-argument
+        # As expected.  We want derived classes to implement this.
+        """Attempts to terminate the process in a polite way.
+
+        This terminate method is intended to give the child process a
+        chance to clean up and exit on its own, possibly with a request
+        to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog,
+        etc.)  If new_process_group was set in the process creation method
+        and the platform supports it, this terminate call will attempt to
+        kill the whole process tree rooted in this child process.
+
+        @param popen_process the subprocess.Popen-like object returned
+        by one of the process-creation methods of this class.
+
+        @param log_file file-like object used to emit error-related
+        logging info.  May be None if no error-related info is desired.
+
+        @param want_core True if the caller would like to get a core
+        dump (or the analogous crash report) from the terminated process.
+        """
+        popen_process.terminate()
+
+    def hard_terminate(self, popen_process, log_file=None):
+        # pylint: disable=no-self-use,unused-argument
+        # As expected.  We want derived classes to implement this.
+        """Attempts to terminate the process immediately.
+
+        This terminate method is intended to kill child process in
+        a manner in which the child process has no ability to block,
+        and also has no ability to clean up properly.  If new_process_group
+        was specified when creating the process, and if the platform
+        implementation supports it, this will attempt to kill the
+        whole process tree rooted in the child process.
+
+        @param popen_process the subprocess.Popen-like object returned
+        by one of the process-creation methods of this class.
+
+        @param log_file file-like object used to emit error-related
+        logging info.  May be None if no error-related info is desired.
+        """
+        popen_process.kill()
+
+
+class UnixProcessHelper(ProcessHelper):
+    """Provides a ProcessHelper for Unix-like operating systems.
+
+    This implementation supports anything that looks Posix-y
+    (e.g. Darwin, Linux, *BSD, etc.)
+    """
+    def __init__(self):
+        super(UnixProcessHelper, self).__init__()
+
+    @classmethod
+    def _create_new_process_group(cls):
+        """Creates a new process group for the calling process."""
+        os.setpgid(os.getpid(), os.getpid())
+
+    def create_piped_process(self, command, new_process_group=True):
+        # Determine what to run after the fork but before the exec.
+        if new_process_group:
+            preexec_func = self._create_new_process_group
+        else:
+            preexec_func = None
+
+        # Create the process.
+        process = subprocess.Popen(
+            command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            close_fds=True,
+            preexec_fn=preexec_func)
+
+        # Remember whether we're using process groups for this
+        # process.
+        process.using_process_groups = new_process_group
+        return process
+
+    @classmethod
+    def _validate_pre_terminate(cls, popen_process, log_file):
+        # Validate args.
+        if popen_process is None:
+            raise ValueError("popen_process is None")
+
+        # Ensure we have something that looks like a valid process.
+        if popen_process.pid < 1:
+            if log_file:
+                log_file.write("skipping soft_terminate(): no process id")
+            return False
+
+        # Don't kill if it's already dead.
+        popen_process.poll()
+        if popen_process.returncode is not None:
+            # It has a returncode.  It has already stopped.
+            if log_file:
+                log_file.write(
+                    "requested to terminate pid {} but it has already "
+                    "terminated, returncode {}".format(
+                        popen_process.pid, popen_process.returncode))
+            # Move along...
+            return False
+
+        # Good to go.
+        return True
+
+    def _kill_with_signal(self, popen_process, log_file, signum):
+        # Validate we're ready to terminate this.
+        if not self._validate_pre_terminate(popen_process, log_file):
+            return
+
+        # Choose kill mechanism based on whether we're targeting
+        # a process group or just a process.
+        if popen_process.using_process_groups:
+            os.killpg(popen_process.pid, signum)
+        else:
+            os.kill(popen_process.pid, signum)
+
+    def soft_terminate(self, popen_process, log_file=None, want_core=True):
+        # Choose signal based on desire for core file.
+        if want_core:
+            # SIGQUIT will generate core by default.  Can be caught.
+            signum = signal.SIGQUIT
+        else:
+            # SIGTERM is the traditional nice way to kill a process.
+            # Can be caught, doesn't generate a core.
+            signum = signal.SIGTERM
+
+        self._kill_with_signal(popen_process, log_file, signum)
+
+    def hard_terminate(self, popen_process, log_file=None):
+        self._kill_with_signal(popen_process, log_file, signal.SIGKILL)
+
+
+class WindowsProcessHelper(ProcessHelper):
+    """Provides a Windows implementation of the ProcessHelper class."""
+    def __init__(self):
+        super(WindowsProcessHelper, self).__init__()
+
+    def create_piped_process(self, command, new_process_group=True):
+        if new_process_group:
+            # We need this flag if we want os.kill() to work on the subprocess.
+            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
+        else:
+            creation_flags = 0
+
+        return subprocess.Popen(
+            command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            creationflags=creation_flags)
+
+
+class ProcessDriver(object):
+    """Drives a child process, notifies on important events, and can timeout.
+
+    Clients are expected to derive from this class and override the
+    on_process_started and on_process_exited methods if they want to
+    hook either of those.
+
+    This class supports timing out the child process in a platform-agnostic
+    way.  The on_process_exited method is informed if the exit was natural
+    or if it was due to a timeout.
+    """
+    def __init__(self):
+        super(ProcessDriver, self).__init__()
+        self.process_helper = ProcessHelper.process_helper()
+        self.pid = None
+        # Create the synchronization event for notifying when the
+        # inferior dotest process is complete.
+        self.done_event = threading.Event()
+        self.io_thread = None
+        self.process = None
+        # Number of seconds to wait for the soft terminate to
+        # wrap up, before moving to more drastic measures.
+        # Might want this longer if core dumps are generated and
+        # take a long time to write out.
+        self.soft_terminate_timeout = 20.0
+        # Number of seconds to wait for the hard terminate to
+        # wrap up, before giving up on the io thread.  This should
+        # be fast.
+        self.hard_terminate_timeout = 10.0
+
+    def write(self, content):
+        # pylint: disable=no-self-use
+        # Intended - we want derived classes to be able to override
+        # this and use any self state they may contain.
+        sys.stdout.write(content)
+
+    def _start_process_and_io_thread(self, command):
+        # Create the process.
+        self.process = self.process_helper.create_piped_process(command)
+        self.pid = self.process.pid
+        self.on_process_started()
+
+        # Ensure the event is cleared that is used for signaling
+        # from the communication() thread when communication is
+        # complete (i.e. the inferior process has finished).
+        self.done_event.clear()
+
+        self.io_thread = CommunicatorThread(
+            self.process, self.done_event, self.write)
+        self.io_thread.start()
+
+    def run_command(self, command):
+        # Start up the child process and the thread that does the
+        # communication pump.
+        self._start_process_and_io_thread(command)
+
+        # Wait indefinitely for the child process to finish
+        # communicating.  This indicates it has closed stdout/stderr
+        # pipes and is done.
+        self.io_thread.join()
+        self.process.wait()
+        exit_status = self.process.returncode
+        if exit_status is None:
+            raise Exception(
+                "no exit status available after the inferior dotest.py "
+                "should have completed")
+
+        # Notify of non-timeout exit.
+        self.on_process_exited(
+            command,
+            self.io_thread.output,
+            False,
+            exit_status)
+
+    def _attempt_soft_kill(self):
+        # The inferior dotest timed out.  Attempt to clean it
+        # with a non-drastic method (so it can clean up properly
+        # and/or generate a core dump).  Often the OS can't guarantee
+        # that the process will really terminate after this.
+        self.process_helper.soft_terminate(self.process)
+
+        # Now wait up to a certain timeout period for the io thread
+        # to say that the communication ended.  If that wraps up
+        # within our soft terminate timeout, we're all done here.
+        io_completed = self.done_event.wait(self.soft_terminate_timeout)
+        if io_completed:
+            # stdout/stderr were closed on the child process side. We
+            # should be able to wait and reap the child process here.
+            self.process.wait()
+            # We terminated, and the done_trying result is n/a
+            terminated = True
+            done_trying = None
+        else:
+            self.write("soft kill attempt of process {} timed out "
+                       "after {} seconds\n".format(
+                           self.process.pid, self.soft_terminate_timeout))
+            terminated = False
+            done_trying = False
+        return terminated, done_trying
+
+    def _attempt_hard_kill(self):
+        # Instruct the process to terminate and really force it to
+        # happen.  Don't give the process a chance to ignore.
+        self.process_helper.hard_terminate(self.process)
+
+        # Reap the child process.  This should not hang as the
+        # hard_kill() mechanism is supposed to really kill it.
+        # Improvement option:
+        # If this does ever hang, convert to a self.process.poll()
+        # loop checking on self.process.returncode until it is not
+        # None or the timeout occurs.
+        self.process.wait()
+
+        # Wait a few moments for the io thread to finish...
+        io_completed = self.done_event.wait(self.hard_terminate_timeout)
+        if not io_completed:
+            # ... but this is not critical if it doesn't end for some
+            # reason.
+            self.write(
+                "hard kill of process {} timed out after {} seconds waiting "
+                "for the io thread (ignoring)\n".format(
+                    self.process.pid, self.hard_terminate_timeout))
+
+        # Set if it terminated.  (Set up for optional improvement above).
+        terminated = self.process.returncode is not None
+        # Nothing else to try.
+        done_trying = True
+
+        return terminated, done_trying
+
+    def _attempt_termination(self, attempt_count):
+        if attempt_count == 1:
+            return self._attempt_soft_kill()
+        elif attempt_count == 2:
+            return self._attempt_hard_kill()
+        else:
+            # We don't have anything else to try.
+            terminated = self.process.returncode is not None
+            done_trying = True
+            return terminated, done_trying
+
+    def _wait_with_timeout(self, timeout_seconds, command):
+        # Allow up to timeout seconds for the io thread to wrap up.
+        completed_normally = self.done_event.wait(timeout_seconds)
+        process_terminated = completed_normally
+        terminate_attempt_count = 0
+
+        # Try as many attempts as we support for trying to shut down
+        # the child process if it's not already shut down.
+        while not process_terminated:
+            terminate_attempt_count += 1
+            # Attempt to terminate.
+            process_terminated, done_trying = self._attempt_termination(
+                terminate_attempt_count)
+            # Check if there's nothing more to try.
+            if done_trying:
+                # Break out of our termination attempt loop.
+                break
+
+        # At this point, we're calling it good.  The process
+        # finished gracefully, was shut down after one or more
+        # attempts, or we failed but gave it our best effort.
+        if process_terminated:
+            exit_status = self.process.returncode
+        else:
+            exit_status = None
+        self.on_process_exited(
+            command,
+            self.io_thread.output,
+            not completed_normally,
+            exit_status)
+
+    def run_command_with_timeout(self, command, timeout):
+        # Figure out how many seconds our timeout description is requesting.
+        timeout_seconds = timeout_to_seconds(timeout)
+
+        # Start up the child process and the thread that does the
+        # communication pump.
+        self._start_process_and_io_thread(command)
+
+        self._wait_with_timeout(timeout_seconds, command)
+
+    def on_process_started(self):
+        pass
+
+    def on_process_exited(self, command, output, was_timeout, exit_status):
+        pass
Index: test/lldb_utils.py
===================================================================
--- /dev/null
+++ test/lldb_utils.py
@@ -0,0 +1,66 @@
+"""
+The LLVM Compiler Infrastructure
+
+This file is distributed under the University of Illinois Open Source
+License. See LICENSE.TXT for details.
+
+Provides classes used by the test results reporting infrastructure
+within the LLDB test suite.
+
+
+This module contains utilities used by the lldb test framwork.
+"""
+
+
+class OptionalWith(object):
+    # pylint: disable=too-few-public-methods
+    # This is a wrapper - it is not meant to provide any extra methods.
+    """Provides a wrapper for objects supporting "with", allowing None.
+
+    This lets a user use the "with object" syntax for resource usage
+    (e.g. locks) even when the wrapped with object is None.
+
+    e.g.
+
+    wrapped_lock = OptionalWith(thread.Lock())
+    with wrapped_lock:
+        # Do something while the lock is obtained.
+        pass
+
+    might_be_none = None
+    wrapped_none = OptionalWith(might_be_none)
+    with wrapped_none:
+        # This code here still works.
+        pass
+
+    This prevents having to write code like this when
+    a lock is optional:
+
+    if lock:
+        lock.acquire()
+
+    try:
+        code_fragament_always_run()
+    finally:
+        if lock:
+            lock.release()
+
+    And I'd posit it is safer, as it becomes impossible to
+    forget the try/finally using OptionalWith(), since
+    the with syntax can be used.
+    """
+    def __init__(self, wrapped_object):
+        self.wrapped_object = wrapped_object
+
+    def __enter__(self):
+        if self.wrapped_object is not None:
+            return self.wrapped_object.__enter__()
+        else:
+            return self
+
+    def __exit__(self, the_type, value, traceback):
+        if self.wrapped_object is not None:
+            return self.wrapped_object.__exit__(the_type, value, traceback)
+        else:
+            # Don't suppress any exceptions
+            return False
Index: test/dosep.py
===================================================================
--- test/dosep.py
+++ test/dosep.py
@@ -32,6 +32,7 @@
 echo core.%p | sudo tee /proc/sys/kernel/core_pattern
 """
 
+# system packages and modules
 import asyncore
 import distutils.version
 import fnmatch
@@ -45,27 +46,12 @@
 import subprocess
 import sys
 import threading
-import test_results
+
+# Our packages and modules
 import dotest_channels
 import dotest_args
-
-
-def get_timeout_command():
-    """Search for a suitable timeout command."""
-    if not sys.platform.startswith("win32"):
-        try:
-            subprocess.call("timeout", stderr=subprocess.PIPE)
-            return "timeout"
-        except OSError:
-            pass
-    try:
-        subprocess.call("gtimeout", stderr=subprocess.PIPE)
-        return "gtimeout"
-    except OSError:
-        pass
-    return None
-
-timeout_command = get_timeout_command()
+import lldb_utils
+import process_control
 
 # Status codes for running command with timeout.
 eTimedOut, ePassed, eFailed = 124, 0, 1
@@ -171,73 +157,93 @@
             unexpected_successes = unexpected_successes + int(unexpected_success_count.group(1))
         if error_count is not None:
             failures = failures + int(error_count.group(1))
-        pass
     return passes, failures, unexpected_successes
 
 
-def create_new_process_group():
-    """Creates a new process group for the process."""
-    os.setpgid(os.getpid(), os.getpid())
+class DoTestProcessDriver(process_control.ProcessDriver):
+    """Drives the dotest.py inferior process and handles bookkeeping."""
+    def __init__(self, output_file, output_file_lock, pid_events, file_name):
+        super(DoTestProcessDriver, self).__init__()
+        self.output_file = output_file
+        self.output_lock = lldb_utils.OptionalWith(output_file_lock)
+        self.pid_events = pid_events
+        self.results = None
+        self.file_name = file_name
+
+    def write(self, content):
+        with self.output_lock:
+            self.output_file.write(content)
+
+    def on_process_started(self):
+        if self.pid_events:
+            self.pid_events.put_nowait(('created', self.process.pid))
+
+    def on_process_exited(self, command, output, was_timeout, exit_status):
+        if self.pid_events:
+            # No point in culling out those with no exit_status (i.e.
+            # those we failed to kill). That would just cause
+            # downstream code to try to kill it later on a Ctrl-C. At
+            # this point, a best-effort-to-kill already took place. So
+            # call it destroyed here.
+            self.pid_events.put_nowait(('destroyed', self.process.pid))
+
+        # Override the exit status if it was a timeout.
+        if was_timeout:
+            exit_status = eTimedOut
+
+        # If we didn't end up with any output, call it empty for
+        # stdout/stderr.
+        if output is None:
+            output = ('', '')
+
+        # Now parse the output.
+        passes, failures, unexpected_successes = parse_test_results(output)
+        if exit_status == 0:
+            # stdout does not have any useful information from 'dotest.py',
+            # only stderr does.
+            report_test_pass(self.file_name, output[1])
+        else:
+            report_test_failure(self.file_name, command, output[1])
 
+        # Save off the results for the caller.
+        self.results = (
+            self.file_name,
+            exit_status,
+            passes,
+            failures,
+            unexpected_successes)
 
-def call_with_timeout(command, timeout, name, inferior_pid_events):
-    """Run command with a timeout if possible.
-    -s QUIT will create a coredump if they are enabled on your system
-    """
-    process = None
-    if timeout_command and timeout != "0":
-        command = [timeout_command, '-s', 'QUIT', timeout] + command
 
+def call_with_timeout(command, timeout, name, inferior_pid_events):
+    # Add our worker index (if we have one) to all test events
+    # from this inferior.
     if GET_WORKER_INDEX is not None:
         try:
             worker_index = GET_WORKER_INDEX()
             command.extend([
-                "--event-add-entries", "worker_index={}:int".format(worker_index)])
-        except:
-            # Ctrl-C does bad things to multiprocessing.Manager.dict() lookup.
+                "--event-add-entries",
+                "worker_index={}:int".format(worker_index)])
+        except:  # pylint: disable=bare-except
+            # Ctrl-C does bad things to multiprocessing.Manager.dict()
+            # lookup.  Just swallow it.
             pass
 
-    # Specifying a value for close_fds is unsupported on Windows when using
-    # subprocess.PIPE
-    if os.name != "nt":
-        process = subprocess.Popen(command,
-                                   stdin=subprocess.PIPE,
-                                   stdout=subprocess.PIPE,
-                                   stderr=subprocess.PIPE,
-                                   close_fds=True,
-                                   preexec_fn=create_new_process_group)
-    else:
-        process = subprocess.Popen(command,
-                                   stdin=subprocess.PIPE,
-                                   stdout=subprocess.PIPE,
-                                   stderr=subprocess.PIPE)
-    inferior_pid = process.pid
-    if inferior_pid_events:
-        inferior_pid_events.put_nowait(('created', inferior_pid))
-    output = process.communicate()
-
-    # The inferior should now be entirely wrapped up.
-    exit_status = process.returncode
-    if exit_status is None:
-        raise Exception(
-            "no exit status available after the inferior dotest.py "
-            "should have completed")
-
-    if inferior_pid_events:
-        inferior_pid_events.put_nowait(('destroyed', inferior_pid))
-
-    passes, failures, unexpected_successes = parse_test_results(output)
-    if exit_status == 0:
-        # stdout does not have any useful information from 'dotest.py',
-        # only stderr does.
-        report_test_pass(name, output[1])
-    else:
-        # TODO need to differentiate a failing test from a run that
-        # was broken out of by a SIGTERM/SIGKILL, reporting those as
-        # an error.  If a signal-based completion, need to call that
-        # an error.
-        report_test_failure(name, command, output[1])
-    return name, exit_status, passes, failures, unexpected_successes
+    # Create the inferior dotest.py ProcessDriver.
+    process_driver = DoTestProcessDriver(
+        sys.stdout,
+        output_lock,
+        inferior_pid_events,
+        name)
+
+    # Run it with a timeout.
+    process_driver.run_command_with_timeout(command, timeout)
+
+    # Return the results.
+    if not process_driver.results:
+        # This is truly exceptional.  Even a failing or timed out
+        # binary should have called the results-generation code.
+        raise Exception("no test results were generated whatsoever")
+    return process_driver.results
 
 
 def process_dir(root, files, test_root, dotest_argv, inferior_pid_events):
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to