epugh commented on code in PR #2160:
URL: https://github.com/apache/solr/pull/2160#discussion_r1601674762


##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java:
##########
@@ -281,6 +298,37 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int 
aliveCheckInterval, T
       return this;
     }
 
+    /**
+     * Positive values can be used to ensure that the liveness checks for a 
given URL will only be
+     * run every "Nth" iteration of the setAliveCheckInterval
+     *
+     * @param aliveCheckSkipIters, number of iterations to skip, an int value *

Review Comment:
   did this mean to have a trailing `*`?



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() {
     return requestWriter;
   }
 
+  private boolean isServerAlive(ServerWrapper zombieServer)
+      throws SolrServerException, IOException {
+    if (null == aliveCheckQuery) {
+      log.debug("Assuming success because aliveCheckQuery is null");
+      return true;
+    }
+    log.debug("Running ping check on server " + zombieServer.getBaseUrl());
+    QueryRequest queryRequest = new QueryRequest(aliveCheckQuery);
+    queryRequest.setBasePath(zombieServer.baseUrl);
+    return 
queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0;
+  }
+
+  private void handleServerBackUp(ServerWrapper zombieServer) {
+    // server has come back up.
+    // make sure to remove from zombies before adding to alive to avoid a race 
condition
+    // where another thread could mark it down, move it back to zombie, and 
then we delete
+    // from zombie and lose it forever.
+    ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl());
+    if (wrapper != null) {
+      wrapper.failedPings = 0;
+      if (wrapper.standard) {
+        addToAlive(wrapper);
+      }
+    } else {
+      // something else already moved the server from zombie to alive
+    }
+  }
+
+  private void handleServerDown(ServerWrapper zombieServer) {
+    // Expected. The server is still down.
+    zombieServer.failedPings++;
+    zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters;
+
+    // If the server doesn't belong in the standard set belonging to this load 
balancer
+    // then simply drop it after a certain number of failed pings.
+    if (!zombieServer.standard && zombieServer.failedPings >= 
NONSTANDARD_PING_LIMIT) {
+      zombieServers.remove(zombieServer.getBaseUrl());
+    }
+  }
+
   private void checkAZombieServer(ServerWrapper zombieServer) {
     try {
-      QueryRequest queryRequest = new QueryRequest(solrQuery);
-      queryRequest.setBasePath(zombieServer.baseUrl);
-      QueryResponse resp = 
queryRequest.process(getClient(zombieServer.getBaseUrl()));
-      if (resp.getStatus() == 0) {
-        // server has come back up.
-        // make sure to remove from zombies before adding to alive to avoid a 
race condition
-        // where another thread could mark it down, move it back to zombie, 
and then we delete
-        // from zombie and lose it forever.
-        ServerWrapper wrapper = 
zombieServers.remove(zombieServer.getBaseUrl());
-        if (wrapper != null) {
-          wrapper.failedPings = 0;
-          if (wrapper.standard) {
-            addToAlive(wrapper);
-          }
-        } else {
-          // something else already moved the server from zombie to alive
-        }
+      // push back on liveness checks only every Nth iteration
+      if (zombieServer.skipAliveCheckIters > 0) {
+        log.debug(
+            "Skipping liveness check for server "
+                + zombieServer.getBaseUrl()
+                + " because skipAliveCheckIters = "
+                + zombieServer.skipAliveCheckIters);
+        zombieServer.skipAliveCheckIters--;
+        return;
       }
-    } catch (Exception e) {
-      // Expected. The server is still down.
-      zombieServer.failedPings++;
 
-      // If the server doesn't belong in the standard set belonging to this 
load balancer
-      // then simply drop it after a certain number of failed pings.
-      if (!zombieServer.standard && zombieServer.failedPings >= 
NONSTANDARD_PING_LIMIT) {
-        zombieServers.remove(zombieServer.getBaseUrl());
+      if (isServerAlive(zombieServer)) {
+        log.debug("Successfully pinged server " + zombieServer.getBaseUrl() + 
", marking it alive");

Review Comment:
   and the check



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java:
##########
@@ -266,19 +280,49 @@ public Builder withBaseSolrUrls(String... solrUrls) {
 
     /**
      * LBHttpSolrServer keeps pinging the dead servers at fixed interval to 
find if it is alive. Use
-     * this to set that interval
-     *
-     * @param aliveCheckInterval time in milliseconds
+     * this to set that interval.
+     * @param aliveCheckInterval how often to ping for aliveness
+     * @see #setAliveCheckSkipIters(int)
+     * @see #setAliveCheckQuery(SolrQuery)
      */
     public Builder setAliveCheckInterval(int aliveCheckInterval) {
       if (aliveCheckInterval <= 0) {
         throw new IllegalArgumentException(
-            "Alive check interval must be " + "positive, specified value = " + 
aliveCheckInterval);
+            "Alive check interval must be positive, specified value = " + 
aliveCheckInterval);
       }
       this.aliveCheckInterval = aliveCheckInterval;
       return this;
     }
 
+    /**
+     * Positive values can be used to ensure that the liveness checks for a 
given URL will only be
+     * run every "Nth" iteration of the setAliveCheckInterval
+     * @param aliveCheckSkipIters, number of iterations to skip, an int value *
+     * @see #setAliveCheckInterval(int)
+     * @see #setAliveCheckQuery(SolrQuery)
+     */
+    public Builder setAliveCheckSkipIters(int aliveCheckSkipIters) {
+      if (aliveCheckSkipIters < 0) {
+        throw new IllegalArgumentException(
+            ("Alive Check Skip Iterations must be positive, specified value = "

Review Comment:
   and the doubled (



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -79,24 +81,27 @@ public abstract class LBSolrClient extends SolrClient {
 
   protected long aliveCheckIntervalMillis =
       TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between 
checks
+  protected int aliveCheckSkipIters = 0;

Review Comment:
   Do we use the short hand `Iters` in lots of places?   If not, I like the 
full `Iteration` as just one less thing to read.



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() {
     return requestWriter;
   }
 
+  private boolean isServerAlive(ServerWrapper zombieServer)
+      throws SolrServerException, IOException {
+    if (null == aliveCheckQuery) {
+      log.debug("Assuming success because aliveCheckQuery is null");
+      return true;
+    }
+    log.debug("Running ping check on server " + zombieServer.getBaseUrl());

Review Comment:
   likewise on the check?



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() {
     return requestWriter;
   }
 
+  private boolean isServerAlive(ServerWrapper zombieServer)
+      throws SolrServerException, IOException {
+    if (null == aliveCheckQuery) {
+      log.debug("Assuming success because aliveCheckQuery is null");
+      return true;
+    }
+    log.debug("Running ping check on server " + zombieServer.getBaseUrl());
+    QueryRequest queryRequest = new QueryRequest(aliveCheckQuery);
+    queryRequest.setBasePath(zombieServer.baseUrl);
+    return 
queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0;
+  }
+
+  private void handleServerBackUp(ServerWrapper zombieServer) {
+    // server has come back up.
+    // make sure to remove from zombies before adding to alive to avoid a race 
condition
+    // where another thread could mark it down, move it back to zombie, and 
then we delete
+    // from zombie and lose it forever.
+    ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl());
+    if (wrapper != null) {
+      wrapper.failedPings = 0;
+      if (wrapper.standard) {
+        addToAlive(wrapper);
+      }
+    } else {
+      // something else already moved the server from zombie to alive
+    }
+  }
+
+  private void handleServerDown(ServerWrapper zombieServer) {
+    // Expected. The server is still down.
+    zombieServer.failedPings++;
+    zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters;
+
+    // If the server doesn't belong in the standard set belonging to this load 
balancer
+    // then simply drop it after a certain number of failed pings.
+    if (!zombieServer.standard && zombieServer.failedPings >= 
NONSTANDARD_PING_LIMIT) {
+      zombieServers.remove(zombieServer.getBaseUrl());
+    }
+  }
+
   private void checkAZombieServer(ServerWrapper zombieServer) {
     try {
-      QueryRequest queryRequest = new QueryRequest(solrQuery);
-      queryRequest.setBasePath(zombieServer.baseUrl);
-      QueryResponse resp = 
queryRequest.process(getClient(zombieServer.getBaseUrl()));
-      if (resp.getStatus() == 0) {
-        // server has come back up.
-        // make sure to remove from zombies before adding to alive to avoid a 
race condition
-        // where another thread could mark it down, move it back to zombie, 
and then we delete
-        // from zombie and lose it forever.
-        ServerWrapper wrapper = 
zombieServers.remove(zombieServer.getBaseUrl());
-        if (wrapper != null) {
-          wrapper.failedPings = 0;
-          if (wrapper.standard) {
-            addToAlive(wrapper);
-          }
-        } else {
-          // something else already moved the server from zombie to alive
-        }
+      // push back on liveness checks only every Nth iteration
+      if (zombieServer.skipAliveCheckIters > 0) {
+        log.debug(

Review Comment:
   the ifDebugEnabled check



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() {
     return requestWriter;
   }
 
+  private boolean isServerAlive(ServerWrapper zombieServer)
+      throws SolrServerException, IOException {
+    if (null == aliveCheckQuery) {
+      log.debug("Assuming success because aliveCheckQuery is null");
+      return true;
+    }
+    log.debug("Running ping check on server " + zombieServer.getBaseUrl());
+    QueryRequest queryRequest = new QueryRequest(aliveCheckQuery);
+    queryRequest.setBasePath(zombieServer.baseUrl);
+    return 
queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0;
+  }
+
+  private void handleServerBackUp(ServerWrapper zombieServer) {
+    // server has come back up.
+    // make sure to remove from zombies before adding to alive to avoid a race 
condition
+    // where another thread could mark it down, move it back to zombie, and 
then we delete
+    // from zombie and lose it forever.
+    ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl());
+    if (wrapper != null) {
+      wrapper.failedPings = 0;
+      if (wrapper.standard) {
+        addToAlive(wrapper);
+      }
+    } else {
+      // something else already moved the server from zombie to alive
+    }
+  }
+
+  private void handleServerDown(ServerWrapper zombieServer) {
+    // Expected. The server is still down.
+    zombieServer.failedPings++;
+    zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters;
+
+    // If the server doesn't belong in the standard set belonging to this load 
balancer
+    // then simply drop it after a certain number of failed pings.
+    if (!zombieServer.standard && zombieServer.failedPings >= 
NONSTANDARD_PING_LIMIT) {
+      zombieServers.remove(zombieServer.getBaseUrl());
+    }
+  }
+
   private void checkAZombieServer(ServerWrapper zombieServer) {
     try {
-      QueryRequest queryRequest = new QueryRequest(solrQuery);
-      queryRequest.setBasePath(zombieServer.baseUrl);
-      QueryResponse resp = 
queryRequest.process(getClient(zombieServer.getBaseUrl()));
-      if (resp.getStatus() == 0) {
-        // server has come back up.
-        // make sure to remove from zombies before adding to alive to avoid a 
race condition
-        // where another thread could mark it down, move it back to zombie, 
and then we delete
-        // from zombie and lose it forever.
-        ServerWrapper wrapper = 
zombieServers.remove(zombieServer.getBaseUrl());
-        if (wrapper != null) {
-          wrapper.failedPings = 0;
-          if (wrapper.standard) {
-            addToAlive(wrapper);
-          }
-        } else {
-          // something else already moved the server from zombie to alive
-        }
+      // push back on liveness checks only every Nth iteration
+      if (zombieServer.skipAliveCheckIters > 0) {
+        log.debug(
+            "Skipping liveness check for server "
+                + zombieServer.getBaseUrl()
+                + " because skipAliveCheckIters = "
+                + zombieServer.skipAliveCheckIters);
+        zombieServer.skipAliveCheckIters--;
+        return;
       }
-    } catch (Exception e) {
-      // Expected. The server is still down.
-      zombieServer.failedPings++;
 
-      // If the server doesn't belong in the standard set belonging to this 
load balancer
-      // then simply drop it after a certain number of failed pings.
-      if (!zombieServer.standard && zombieServer.failedPings >= 
NONSTANDARD_PING_LIMIT) {
-        zombieServers.remove(zombieServer.getBaseUrl());
+      if (isServerAlive(zombieServer)) {
+        log.debug("Successfully pinged server " + zombieServer.getBaseUrl() + 
", marking it alive");
+        handleServerBackUp(zombieServer);
       }
+
+    } catch (Exception e) {
+      log.debug(

Review Comment:
   and the check!   I think the whole check thing is werid that it's not just 
part of log.debug!



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -79,24 +81,27 @@ public abstract class LBSolrClient extends SolrClient {
 
   protected long aliveCheckIntervalMillis =
       TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between 
checks
+  protected int aliveCheckSkipIters = 0;

Review Comment:
   maybe just a personal thing though.



##########
solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java:
##########
@@ -205,16 +202,31 @@ public void testTwoServers() throws Exception {
     }
   }
 
-  public void testReliability() throws Exception {
+  @LogLevel("org.apache.solr.client.solrj.impl.LBSolrClient=DEBUG")

Review Comment:
   ah, I think I see why you have it from looking lower down in the 
LogReliablityTestSetup



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java:
##########
@@ -266,19 +280,49 @@ public Builder withBaseSolrUrls(String... solrUrls) {
 
     /**
      * LBHttpSolrServer keeps pinging the dead servers at fixed interval to 
find if it is alive. Use
-     * this to set that interval
-     *
-     * @param aliveCheckInterval time in milliseconds
+     * this to set that interval.
+     * @param aliveCheckInterval how often to ping for aliveness
+     * @see #setAliveCheckSkipIters(int)
+     * @see #setAliveCheckQuery(SolrQuery)
      */
     public Builder setAliveCheckInterval(int aliveCheckInterval) {
       if (aliveCheckInterval <= 0) {
         throw new IllegalArgumentException(
-            "Alive check interval must be " + "positive, specified value = " + 
aliveCheckInterval);
+            "Alive check interval must be positive, specified value = " + 
aliveCheckInterval);
       }
       this.aliveCheckInterval = aliveCheckInterval;
       return this;
     }
 
+    /**
+     * Positive values can be used to ensure that the liveness checks for a 
given URL will only be
+     * run every "Nth" iteration of the setAliveCheckInterval
+     * @param aliveCheckSkipIters, number of iterations to skip, an int value *

Review Comment:
   again the trailing `*`



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java:
##########
@@ -281,6 +298,37 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int 
aliveCheckInterval, T
       return this;
     }
 
+    /**
+     * Positive values can be used to ensure that the liveness checks for a 
given URL will only be
+     * run every "Nth" iteration of the setAliveCheckInterval
+     *
+     * @param aliveCheckSkipIters, number of iterations to skip, an int value *
+     * @see #setAliveCheckInterval(int, TimeUnit)
+     * @see #setAliveCheckQuery(SolrQuery)
+     */
+    public LBHttp2SolrClient.Builder setAliveCheckSkipIters(int 
aliveCheckSkipIters) {
+      if (aliveCheckSkipIters < 0) {
+        throw new IllegalArgumentException(
+            ("Alive Check Skip Iterations must be positive, specified value = "

Review Comment:
   do we need the doubled `(` here?   Maybe for string parsing?



##########
solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java:
##########
@@ -205,16 +202,31 @@ public void testTwoServers() throws Exception {
     }
   }
 
-  public void testReliability() throws Exception {
+  @LogLevel("org.apache.solr.client.solrj.impl.LBSolrClient=DEBUG")

Review Comment:
   is this needed to make the test pass, or from working on the code?  the 
@logLevel?



##########
solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java:
##########
@@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() {
     return requestWriter;
   }
 
+  private boolean isServerAlive(ServerWrapper zombieServer)
+      throws SolrServerException, IOException {
+    if (null == aliveCheckQuery) {

Review Comment:
   I think we have normally a check to see if we are in debug mode before 
calling the `.debug`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to