This is an automated email from the ASF dual-hosted git repository.

pjfanning pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/pekko.git


The following commit(s) were added to refs/heads/main by this push:
     new 3c78ad4eeb try to fix flaky RotatingProviderWithChangingKeysSpec 
(#2940)
3c78ad4eeb is described below

commit 3c78ad4eebe80390c06d325f9cba84f15e481af5
Author: PJ Fanning <[email protected]>
AuthorDate: Thu May 7 08:50:21 2026 +0100

    try to fix flaky RotatingProviderWithChangingKeysSpec (#2940)
    
    * Fix flaky RotatingProviderWithChangingKeysSpec: retry test and improve 
contact robustness
    
    Motivation:
    The `must rebuild the SSLContext using new keys` test is flaky because
    `contact` times out (6s) while TLS connection establishment is still
    in progress. A single failed run discards all the work done in that
    attempt.
    
    Modification:
    - `contact` now retries the `Identify`/`ActorIdentity` exchange up to
      3 times (3 s each) rather than failing immediately on one timeout,
      giving TLS connection establishment additional time to complete.
    - `RotatingProviderWithChangingKeysSpec` overrides `withFixture` to
      retry the whole test once on failure.  Before retrying, actor systems
      created during the failed attempt are terminated so they do not
      interfere with the second run (no port conflicts since new systems
      obtain fresh dynamic ports; systems are also tracked in
      ArteryMultiNodeSpec.remoteSystems for final cleanup in
      afterTermination).
    - Added `import scala.concurrent.duration._` required by `3.seconds`.
    
    Result:
    The test is more resilient to transient TLS handshake delays and is
    automatically retried once if it still fails.
    
    Tests:
    - scalafmt not installed; sbt not installed; recorded as skipped
    - sbt "remote / Test / testOnly 
o.a.p.remote.artery.tcp.ssl.RotatingProviderWithChangingKeysSpec" - skipped 
(sbt unavailable)
    
    References:
    None - reduces test flakiness
    
    Agent-Logs-Url: 
https://github.com/pjfanning/incubator-pekko/sessions/080d49a3-0c3c-401b-ae0a-a5fe875189cc
    
    Co-authored-by: pjfanning <[email protected]>
    
    * Update RotatingKeysSSLEngineProviderSpec.scala
    
    ---------
    
    Co-authored-by: copilot-swe-agent[bot] 
<[email protected]>
    Co-authored-by: pjfanning <[email protected]>
---
 .../ssl/RotatingKeysSSLEngineProviderSpec.scala    | 38 ++++++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git 
a/remote/src/test/scala/org/apache/pekko/remote/artery/tcp/ssl/RotatingKeysSSLEngineProviderSpec.scala
 
b/remote/src/test/scala/org/apache/pekko/remote/artery/tcp/ssl/RotatingKeysSSLEngineProviderSpec.scala
index b734355387..52b35e9458 100644
--- 
a/remote/src/test/scala/org/apache/pekko/remote/artery/tcp/ssl/RotatingKeysSSLEngineProviderSpec.scala
+++ 
b/remote/src/test/scala/org/apache/pekko/remote/artery/tcp/ssl/RotatingKeysSSLEngineProviderSpec.scala
@@ -23,6 +23,7 @@ import javax.net.ssl.SSLEngine
 import javax.net.ssl.SSLSession
 
 import scala.concurrent.blocking
+import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
 import org.apache.pekko
@@ -45,6 +46,7 @@ import pekko.testkit.TestProbe
 import pekko.util.JavaVersion
 
 import com.typesafe.config.ConfigFactory
+import org.scalatest.Outcome
 
 // This is a simplification Spec. It doesn't rely on changing files.
 class RotatingProviderWithStaticKeysSpec
@@ -110,6 +112,23 @@ class RotatingProviderWithChangingKeysSpec
     extends 
RotatingKeysSSLEngineProviderSpec(RotatingKeysSSLEngineProviderSpec.tempFileConfig)
 {
   import RotatingKeysSSLEngineProviderSpec._
 
+  // Retry the test once on failure, cleaning up actor systems from the failed 
attempt first
+  override def withFixture(test: NoArgTest): Outcome = {
+    val systemsBefore = systemsToTerminate
+    val outcome = super.withFixture(test)
+    if (outcome.isFailed) {
+      // Terminate actor systems that were created during the failed attempt.
+      // Non-blocking: systems are still tracked in 
ArteryMultiNodeSpec.remoteSystems and
+      // will be awaited in afterTermination(), so there is no resource leak.
+      val newSystems = systemsToTerminate.drop(systemsBefore.length)
+      systemsToTerminate = systemsBefore
+      newSystems.foreach(_.terminate())
+      super.withFixture(test)
+    } else {
+      outcome
+    }
+  }
+
   protected override def atStartup(): Unit = {
     super.atStartup()
     deployCaCert()
@@ -267,9 +286,22 @@ abstract class 
RotatingKeysSSLEngineProviderSpec(extraConfig: String)
 
   def contact(fromSystem: ActorSystem, toPath: ActorPath): Unit = {
     val senderOnSource = TestProbe()(fromSystem)
-    fromSystem.actorSelection(toPath).tell(Identify(toPath.name), 
senderOnSource.ref)
-    val targetRef: ActorRef = 
senderOnSource.expectMsgType[ActorIdentity].ref.get
-    targetRef.tell("ping-1", senderOnSource.ref)
+    val maxAttempts = 3
+    // Per-attempt timeout; allows TLS connection establishment to complete 
before giving up
+    val identifyTimeout = 3.seconds
+    var attempts = 0
+    var targetRef: Option[ActorRef] = None
+    while (targetRef.isEmpty && attempts < maxAttempts) {
+      attempts += 1
+      fromSystem.actorSelection(toPath).tell(Identify(toPath.name), 
senderOnSource.ref)
+      senderOnSource.receiveOne(identifyTimeout) match {
+        case ActorIdentity(_, ref) => targetRef = ref
+        case _                     => // timeout or unexpected message; retry
+      }
+    }
+    val ref = targetRef.getOrElse(
+      fail(s"Timed out waiting for ActorIdentity from $toPath after 
$maxAttempts attempts"))
+    ref.tell("ping-1", senderOnSource.ref)
     senderOnSource.expectMsg("ping-1")
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to