On Mon, Jun 10, 2024 at 9:36 AM Juraj Linkeš <juraj.lin...@pantheon.tech> wrote: > > > diff --git a/dts/framework/remote_session/interactive_shell.py > > b/dts/framework/remote_session/interactive_shell.py > > index 5cfe202e15..921c73d9df 100644 > > --- a/dts/framework/remote_session/interactive_shell.py > > +++ b/dts/framework/remote_session/interactive_shell.py > > @@ -32,6 +34,10 @@ class InteractiveShell(ABC): > > and collecting input until reaching a certain prompt. All interactive > > applications > > will use the same SSH connection, but each will create their own > > channel on that > > session. > > + > > + Attributes: > > + is_started: :data:`True` if the application has started > > successfully, :data:`False` > > + otherwise. > > """ > > > > _interactive_session: SSHClient > > @@ -41,6 +47,7 @@ class InteractiveShell(ABC): > > _logger: DTSLogger > > _timeout: float > > _app_args: str > > + _finalizer: weakref.finalize > > > > #: Prompt to expect at the end of output when sending a command. > > #: This is often overridden by subclasses. > > @@ -58,6 +65,8 @@ class InteractiveShell(ABC): > > #: for DPDK on the node will be prepended to the path to the > > executable. > > dpdk_app: ClassVar[bool] = False > > > > + is_started: bool = False > > A better name would be is_alive to unify it with SSHSession.
Ack. > > > + > > def __init__( > > self, > > interactive_session: SSHClient, > > @@ -93,17 +102,39 @@ def __init__( > > def _start_application(self, get_privileged_command: Callable[[str], > > str] | None) -> None: > > """Starts a new interactive application based on the path to the > > app. > > > > - This method is often overridden by subclasses as their process for > > - starting may look different. > > + This method is often overridden by subclasses as their process for > > starting may look > > + different. Initialization of the shell on the host can be retried > > up to 5 times. This is > > + done because some DPDK applications need slightly more time after > > exiting their script to > > + clean up EAL before others can start. > > + > > + When the application is started we also bind a class for > > finalization to this instance of > > + the shell to ensure proper cleanup of the application. > > Let's also include the explanation from the commit message. Ack. > > > > > Args: > > get_privileged_command: A function (but could be any > > callable) that produces > > the version of the command with elevated privileges. > > """ > > + self._finalizer = weakref.finalize(self, self._close) > > This looks like exactly what we should do, but out of curiosity, do > Paramiko docs mention how we should handle channel closing? They don't say much about how to properly handle closing them. They do mention though that the channels are automatically closed when their transport is closed, or when they are garbage collected. I guess the likely reason then for why they don't say how to handle closing them is because regardless of what you do they will still class `close()` at garbage collection. > > > + max_retries = 5 > > + self._ssh_channel.settimeout(5) > > start_command = f"{self.path} {self._app_args}" > > if get_privileged_command is not None: > > start_command = get_privileged_command(start_command) > > - self.send_command(start_command) > > + self.is_started = True > > + for retry in range(max_retries): > > + try: > > + self.send_command(start_command) > > + break > > + except TimeoutError: > > + self._logger.info( > > + "Interactive shell failed to start, retrying... " > > + f"({retry+1} out of {max_retries})" > > + ) > > + else: > > + self._ssh_channel.settimeout(self._timeout) > > + self.is_started = False # update state on failure to start > > + raise InteractiveCommandExecutionError("Failed to start > > application.") > > + self._ssh_channel.settimeout(self._timeout) > > > > def send_command(self, command: str, prompt: str | None = None) -> > > str: > > """Send `command` and get all output before the expected ending > > string.