This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new ed78d6c2c9e Fix KubernetesPodOperator XCom sidecar hang on Alpine
(#58931)
ed78d6c2c9e is described below
commit ed78d6c2c9e98c35dfdd58dc8b410bab89847de7
Author: Yahely Ushpiz <[email protected]>
AuthorDate: Wed Mar 11 21:49:34 2026 +0200
Fix KubernetesPodOperator XCom sidecar hang on Alpine (#58931)
* Fix KubernetesPodOperator XCom sidecar hang on Alpine
The [extract_xcom_kill] method previously used to identify and kill the
sidecar process. However, the implementation in some Alpine/BusyBox versions
does not support the flag, causing the command to fail and the sidecar to hang
indefinitely.
This commit replaces the command with a portable shell loop that iterates
over to identify processes owned by the current user. This ensures
compatibility with all Alpine versions and removes the dependency on .
* changed alpine fallback command
* added conditional fallback xcom kill command when the primary one fails
* Revert "changed alpine fallback command"
This reverts commit 8c4b5f50fa284bee5d7b310d41bc983862cc406b.
* Convert the xcom kill command conditioning to exception and try/catch
* Fixed command string format
---
.../providers/cncf/kubernetes/utils/pod_manager.py | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py
index 32ce54465bd..78d95d38d0b 100644
---
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py
+++
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py
@@ -242,6 +242,10 @@ class PodNotFoundException(AirflowException):
"""Expected pod does not exist in kube-api."""
+class PodCommandException(AirflowException):
+ """When a pod command execution fails."""
+
+
class PodLogsConsumer:
"""
Responsible for pulling pod logs from a stream with checking a container
status before reading data.
@@ -969,7 +973,18 @@ class PodManager(LoggingMixin):
_preload_content=False,
)
) as resp:
- self._exec_pod_command(resp, "kill -2 $(pgrep -u $(id -u) -f
'sh')")
+ xcom_kill_command = "kill -2 $(pgrep -u $(id -u) -f 'sh')"
+ # fallback command for containers that don't support pgrep -u
+ fallback_xcom_kill_command = (
+ "for f in /proc/[0-9]*/comm; do "
+ '[ -O $f ] && read c < $f && [ "$c" = "sh" ] && pid=${f%/comm}
&& kill -2 ${pid##*/}; '
+ "done"
+ )
+ try:
+ self._exec_pod_command(resp, xcom_kill_command)
+ except PodCommandException:
+ self.log.info("Primary kill command failed, trying fallback
command")
+ self._exec_pod_command(resp, fallback_xcom_kill_command)
def _exec_pod_command(self, resp, command: str) -> str | None:
res = ""
@@ -985,8 +1000,8 @@ class PodManager(LoggingMixin):
while resp.peek_stderr():
error_res += resp.read_stderr()
if error_res:
- self.log.info("stderr from command: %s", error_res)
- break
+ self.log.warning("stderr from command: %s", error_res)
+ raise PodCommandException(f"Command failed with stderr:
{error_res}")
if res:
return res
return None