diff --git a/dts/framework/remote_session/interactive_shell.py
b/dts/framework/remote_session/interactive_shell.py
index 5cfe202e15..921c73d9df 100644
--- a/dts/framework/remote_session/interactive_shell.py
+++ b/dts/framework/remote_session/interactive_shell.py
@@ -32,6 +34,10 @@ class InteractiveShell(ABC):
and collecting input until reaching a certain prompt. All interactive
applications
will use the same SSH connection, but each will create their own channel
on that
session.
+
+ Attributes:
+ is_started: :data:`True` if the application has started successfully,
:data:`False`
+ otherwise.
"""
_interactive_session: SSHClient
@@ -41,6 +47,7 @@ class InteractiveShell(ABC):
_logger: DTSLogger
_timeout: float
_app_args: str
+ _finalizer: weakref.finalize
#: Prompt to expect at the end of output when sending a command.
#: This is often overridden by subclasses.
@@ -58,6 +65,8 @@ class InteractiveShell(ABC):
#: for DPDK on the node will be prepended to the path to the executable.
dpdk_app: ClassVar[bool] = False
+ is_started: bool = False
A better name would be is_alive to unify it with SSHSession.
+
def __init__(
self,
interactive_session: SSHClient,
@@ -93,17 +102,39 @@ def __init__(
def _start_application(self, get_privileged_command: Callable[[str], str] |
None) -> None:
"""Starts a new interactive application based on the path to the app.
- This method is often overridden by subclasses as their process for
- starting may look different.
+ This method is often overridden by subclasses as their process for
starting may look
+ different. Initialization of the shell on the host can be retried up
to 5 times. This is
+ done because some DPDK applications need slightly more time after
exiting their script to
+ clean up EAL before others can start.
+
+ When the application is started we also bind a class for finalization
to this instance of
+ the shell to ensure proper cleanup of the application.
Let's also include the explanation from the commit message.
Args:
get_privileged_command: A function (but could be any callable)
that produces
the version of the command with elevated privileges.
"""
+ self._finalizer = weakref.finalize(self, self._close)
This looks like exactly what we should do, but out of curiosity, do
Paramiko docs mention how we should handle channel closing?
+ max_retries = 5
+ self._ssh_channel.settimeout(5)
start_command = f"{self.path} {self._app_args}"
if get_privileged_command is not None:
start_command = get_privileged_command(start_command)
- self.send_command(start_command)
+ self.is_started = True
+ for retry in range(max_retries):
+ try:
+ self.send_command(start_command)
+ break
+ except TimeoutError:
+ self._logger.info(
+ "Interactive shell failed to start, retrying... "
+ f"({retry+1} out of {max_retries})"
+ )
+ else:
+ self._ssh_channel.settimeout(self._timeout)
+ self.is_started = False # update state on failure to start
+ raise InteractiveCommandExecutionError("Failed to start
application.")
+ self._ssh_channel.settimeout(self._timeout)
def send_command(self, command: str, prompt: str | None = None) -> str:
"""Send `command` and get all output before the expected ending
string.