[ 
https://issues.apache.org/jira/browse/SPARK-50914?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17916543#comment-17916543
 ] 

Hyukjin Kwon commented on SPARK-50914:
--------------------------------------

Taking a look now

> Fix flaky ResourceProfileTests.test_profile_before_sc_for_connect
> -----------------------------------------------------------------
>
>                 Key: SPARK-50914
>                 URL: https://issues.apache.org/jira/browse/SPARK-50914
>             Project: Spark
>          Issue Type: Sub-task
>          Components: Connect, PySpark, Tests
>    Affects Versions: 4.0.0
>            Reporter: Hyukjin Kwon
>            Priority: Major
>
> https://github.com/apache/spark/actions/runs/12874308876/job/35893494368
> {code}
> ======================================================================
> ERROR [611.446s]: test_profile_before_sc_for_connect 
> (pyspark.resource.tests.test_connect_resources.ResourceProfileTests.test_profile_before_sc_for_connect)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 162, in _has_next
>     self._current = self._call_iter(
>                     ^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 281, in _call_iter
>     raise e
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 261, in _call_iter
>     return iter_fun()
>            ^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 163, in <lambda>
>     lambda: next(self._iterator)  # type: ignore[arg-type]
>             ^^^^^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/grpc/_channel.py",
>  line 543, in __next__
>     return self._next()
>            ^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/grpc/_channel.py",
>  line 952, in _next
>     raise self
> grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that 
> terminated with:
>       status = StatusCode.UNAVAILABLE
>       details = "failed to connect to all addresses; last error: UNKNOWN: 
> ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD 
> Shutdown"
>       debug_error_string = "UNKNOWN:Error received from peer  
> {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: 
> ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD 
> Shutdown", grpc_status:14, 
> created_time:"2025-01-20T20:13:41.555914636+00:00"}"
> >
> The above exception was the direct cause of the following exception:
> Traceback (most recent call last):
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/resource/tests/test_connect_resources.py",
>  line 49, in test_profile_before_sc_for_connect
>     rp.id
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/resource/profile.py",
>  line 138, in id
>     rp = ResourceProfile(
>          ^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/resource/profile.py",
>  line 65, in __init__
>     self._id = session.client._create_profile(self._remote_profile)
>                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1938, in _create_profile
>     (_, properties, _) = self.execute_command(cmd)
>                          ^^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1121, in execute_command
>     data, _, metrics, observed_metrics, properties = self._execute_and_fetch(
>                                                      ^^^^^^^^^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1533, in _execute_and_fetch
>     for response in self._execute_and_fetch_as_iterator(
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1510, in _execute_and_fetch_as_iterator
>     self._handle_error(error)
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1790, in _handle_error
>     raise error
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py",
>  line 1496, in _execute_and_fetch_as_iterator
>     for b in generator:
>   File "<frozen _collections_abc>", line 330, in __next__
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 138, in send
>     if not self._has_next():
>            ^^^^^^^^^^^^^^^^
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 190, in _has_next
>     raise e
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/reattach.py",
>  line 158, in _has_next
>     for attempt in self._retrying():
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/retries.py",
>  line 251, in __iter__
>     self._wait()
>   File 
> "/opt/hostedtoolcache/Python/3.11.11/x64/lib/python3.11/site-packages/pyspark/sql/connect/client/retries.py",
>  line 236, in _wait
>     raise RetriesExceeded(errorClass="RETRIES_EXCEEDED", 
> messageParameters={}) from exception
> pyspark.errors.exceptions.base.RetriesExceeded: [RETRIES_EXCEEDED] The 
> maximum number of retries has been exceeded.
> ----------------------------------------------------------------------
> Ran 1 test in 611.446s
> FAILED (errors=1)
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to