epugh commented on code in PR #2160: URL: https://github.com/apache/solr/pull/2160#discussion_r1601674762
########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java: ########## @@ -281,6 +298,37 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int aliveCheckInterval, T return this; } + /** + * Positive values can be used to ensure that the liveness checks for a given URL will only be + * run every "Nth" iteration of the setAliveCheckInterval + * + * @param aliveCheckSkipIters, number of iterations to skip, an int value * Review Comment: did this mean to have a trailing `*`? ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { + log.debug("Assuming success because aliveCheckQuery is null"); + return true; + } + log.debug("Running ping check on server " + zombieServer.getBaseUrl()); + QueryRequest queryRequest = new QueryRequest(aliveCheckQuery); + queryRequest.setBasePath(zombieServer.baseUrl); + return queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0; + } + + private void handleServerBackUp(ServerWrapper zombieServer) { + // server has come back up. + // make sure to remove from zombies before adding to alive to avoid a race condition + // where another thread could mark it down, move it back to zombie, and then we delete + // from zombie and lose it forever. + ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); + if (wrapper != null) { + wrapper.failedPings = 0; + if (wrapper.standard) { + addToAlive(wrapper); + } + } else { + // something else already moved the server from zombie to alive + } + } + + private void handleServerDown(ServerWrapper zombieServer) { + // Expected. The server is still down. + zombieServer.failedPings++; + zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters; + + // If the server doesn't belong in the standard set belonging to this load balancer + // then simply drop it after a certain number of failed pings. + if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { + zombieServers.remove(zombieServer.getBaseUrl()); + } + } + private void checkAZombieServer(ServerWrapper zombieServer) { try { - QueryRequest queryRequest = new QueryRequest(solrQuery); - queryRequest.setBasePath(zombieServer.baseUrl); - QueryResponse resp = queryRequest.process(getClient(zombieServer.getBaseUrl())); - if (resp.getStatus() == 0) { - // server has come back up. - // make sure to remove from zombies before adding to alive to avoid a race condition - // where another thread could mark it down, move it back to zombie, and then we delete - // from zombie and lose it forever. - ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); - if (wrapper != null) { - wrapper.failedPings = 0; - if (wrapper.standard) { - addToAlive(wrapper); - } - } else { - // something else already moved the server from zombie to alive - } + // push back on liveness checks only every Nth iteration + if (zombieServer.skipAliveCheckIters > 0) { + log.debug( + "Skipping liveness check for server " + + zombieServer.getBaseUrl() + + " because skipAliveCheckIters = " + + zombieServer.skipAliveCheckIters); + zombieServer.skipAliveCheckIters--; + return; } - } catch (Exception e) { - // Expected. The server is still down. - zombieServer.failedPings++; - // If the server doesn't belong in the standard set belonging to this load balancer - // then simply drop it after a certain number of failed pings. - if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { - zombieServers.remove(zombieServer.getBaseUrl()); + if (isServerAlive(zombieServer)) { + log.debug("Successfully pinged server " + zombieServer.getBaseUrl() + ", marking it alive"); Review Comment: and the check ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java: ########## @@ -266,19 +280,49 @@ public Builder withBaseSolrUrls(String... solrUrls) { /** * LBHttpSolrServer keeps pinging the dead servers at fixed interval to find if it is alive. Use - * this to set that interval - * - * @param aliveCheckInterval time in milliseconds + * this to set that interval. + * @param aliveCheckInterval how often to ping for aliveness + * @see #setAliveCheckSkipIters(int) + * @see #setAliveCheckQuery(SolrQuery) */ public Builder setAliveCheckInterval(int aliveCheckInterval) { if (aliveCheckInterval <= 0) { throw new IllegalArgumentException( - "Alive check interval must be " + "positive, specified value = " + aliveCheckInterval); + "Alive check interval must be positive, specified value = " + aliveCheckInterval); } this.aliveCheckInterval = aliveCheckInterval; return this; } + /** + * Positive values can be used to ensure that the liveness checks for a given URL will only be + * run every "Nth" iteration of the setAliveCheckInterval + * @param aliveCheckSkipIters, number of iterations to skip, an int value * + * @see #setAliveCheckInterval(int) + * @see #setAliveCheckQuery(SolrQuery) + */ + public Builder setAliveCheckSkipIters(int aliveCheckSkipIters) { + if (aliveCheckSkipIters < 0) { + throw new IllegalArgumentException( + ("Alive Check Skip Iterations must be positive, specified value = " Review Comment: and the doubled ( ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -79,24 +81,27 @@ public abstract class LBSolrClient extends SolrClient { protected long aliveCheckIntervalMillis = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between checks + protected int aliveCheckSkipIters = 0; Review Comment: Do we use the short hand `Iters` in lots of places? If not, I like the full `Iteration` as just one less thing to read. ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { + log.debug("Assuming success because aliveCheckQuery is null"); + return true; + } + log.debug("Running ping check on server " + zombieServer.getBaseUrl()); Review Comment: likewise on the check? ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { + log.debug("Assuming success because aliveCheckQuery is null"); + return true; + } + log.debug("Running ping check on server " + zombieServer.getBaseUrl()); + QueryRequest queryRequest = new QueryRequest(aliveCheckQuery); + queryRequest.setBasePath(zombieServer.baseUrl); + return queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0; + } + + private void handleServerBackUp(ServerWrapper zombieServer) { + // server has come back up. + // make sure to remove from zombies before adding to alive to avoid a race condition + // where another thread could mark it down, move it back to zombie, and then we delete + // from zombie and lose it forever. + ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); + if (wrapper != null) { + wrapper.failedPings = 0; + if (wrapper.standard) { + addToAlive(wrapper); + } + } else { + // something else already moved the server from zombie to alive + } + } + + private void handleServerDown(ServerWrapper zombieServer) { + // Expected. The server is still down. + zombieServer.failedPings++; + zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters; + + // If the server doesn't belong in the standard set belonging to this load balancer + // then simply drop it after a certain number of failed pings. + if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { + zombieServers.remove(zombieServer.getBaseUrl()); + } + } + private void checkAZombieServer(ServerWrapper zombieServer) { try { - QueryRequest queryRequest = new QueryRequest(solrQuery); - queryRequest.setBasePath(zombieServer.baseUrl); - QueryResponse resp = queryRequest.process(getClient(zombieServer.getBaseUrl())); - if (resp.getStatus() == 0) { - // server has come back up. - // make sure to remove from zombies before adding to alive to avoid a race condition - // where another thread could mark it down, move it back to zombie, and then we delete - // from zombie and lose it forever. - ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); - if (wrapper != null) { - wrapper.failedPings = 0; - if (wrapper.standard) { - addToAlive(wrapper); - } - } else { - // something else already moved the server from zombie to alive - } + // push back on liveness checks only every Nth iteration + if (zombieServer.skipAliveCheckIters > 0) { + log.debug( Review Comment: the ifDebugEnabled check ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { + log.debug("Assuming success because aliveCheckQuery is null"); + return true; + } + log.debug("Running ping check on server " + zombieServer.getBaseUrl()); + QueryRequest queryRequest = new QueryRequest(aliveCheckQuery); + queryRequest.setBasePath(zombieServer.baseUrl); + return queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0; + } + + private void handleServerBackUp(ServerWrapper zombieServer) { + // server has come back up. + // make sure to remove from zombies before adding to alive to avoid a race condition + // where another thread could mark it down, move it back to zombie, and then we delete + // from zombie and lose it forever. + ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); + if (wrapper != null) { + wrapper.failedPings = 0; + if (wrapper.standard) { + addToAlive(wrapper); + } + } else { + // something else already moved the server from zombie to alive + } + } + + private void handleServerDown(ServerWrapper zombieServer) { + // Expected. The server is still down. + zombieServer.failedPings++; + zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters; + + // If the server doesn't belong in the standard set belonging to this load balancer + // then simply drop it after a certain number of failed pings. + if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { + zombieServers.remove(zombieServer.getBaseUrl()); + } + } + private void checkAZombieServer(ServerWrapper zombieServer) { try { - QueryRequest queryRequest = new QueryRequest(solrQuery); - queryRequest.setBasePath(zombieServer.baseUrl); - QueryResponse resp = queryRequest.process(getClient(zombieServer.getBaseUrl())); - if (resp.getStatus() == 0) { - // server has come back up. - // make sure to remove from zombies before adding to alive to avoid a race condition - // where another thread could mark it down, move it back to zombie, and then we delete - // from zombie and lose it forever. - ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); - if (wrapper != null) { - wrapper.failedPings = 0; - if (wrapper.standard) { - addToAlive(wrapper); - } - } else { - // something else already moved the server from zombie to alive - } + // push back on liveness checks only every Nth iteration + if (zombieServer.skipAliveCheckIters > 0) { + log.debug( + "Skipping liveness check for server " + + zombieServer.getBaseUrl() + + " because skipAliveCheckIters = " + + zombieServer.skipAliveCheckIters); + zombieServer.skipAliveCheckIters--; + return; } - } catch (Exception e) { - // Expected. The server is still down. - zombieServer.failedPings++; - // If the server doesn't belong in the standard set belonging to this load balancer - // then simply drop it after a certain number of failed pings. - if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { - zombieServers.remove(zombieServer.getBaseUrl()); + if (isServerAlive(zombieServer)) { + log.debug("Successfully pinged server " + zombieServer.getBaseUrl() + ", marking it alive"); + handleServerBackUp(zombieServer); } + + } catch (Exception e) { + log.debug( Review Comment: and the check! I think the whole check thing is werid that it's not just part of log.debug! ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -79,24 +81,27 @@ public abstract class LBSolrClient extends SolrClient { protected long aliveCheckIntervalMillis = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between checks + protected int aliveCheckSkipIters = 0; Review Comment: maybe just a personal thing though. ########## solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java: ########## @@ -205,16 +202,31 @@ public void testTwoServers() throws Exception { } } - public void testReliability() throws Exception { + @LogLevel("org.apache.solr.client.solrj.impl.LBSolrClient=DEBUG") Review Comment: ah, I think I see why you have it from looking lower down in the LogReliablityTestSetup ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java: ########## @@ -266,19 +280,49 @@ public Builder withBaseSolrUrls(String... solrUrls) { /** * LBHttpSolrServer keeps pinging the dead servers at fixed interval to find if it is alive. Use - * this to set that interval - * - * @param aliveCheckInterval time in milliseconds + * this to set that interval. + * @param aliveCheckInterval how often to ping for aliveness + * @see #setAliveCheckSkipIters(int) + * @see #setAliveCheckQuery(SolrQuery) */ public Builder setAliveCheckInterval(int aliveCheckInterval) { if (aliveCheckInterval <= 0) { throw new IllegalArgumentException( - "Alive check interval must be " + "positive, specified value = " + aliveCheckInterval); + "Alive check interval must be positive, specified value = " + aliveCheckInterval); } this.aliveCheckInterval = aliveCheckInterval; return this; } + /** + * Positive values can be used to ensure that the liveness checks for a given URL will only be + * run every "Nth" iteration of the setAliveCheckInterval + * @param aliveCheckSkipIters, number of iterations to skip, an int value * Review Comment: again the trailing `*` ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java: ########## @@ -281,6 +298,37 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int aliveCheckInterval, T return this; } + /** + * Positive values can be used to ensure that the liveness checks for a given URL will only be + * run every "Nth" iteration of the setAliveCheckInterval + * + * @param aliveCheckSkipIters, number of iterations to skip, an int value * + * @see #setAliveCheckInterval(int, TimeUnit) + * @see #setAliveCheckQuery(SolrQuery) + */ + public LBHttp2SolrClient.Builder setAliveCheckSkipIters(int aliveCheckSkipIters) { + if (aliveCheckSkipIters < 0) { + throw new IllegalArgumentException( + ("Alive Check Skip Iterations must be positive, specified value = " Review Comment: do we need the doubled `(` here? Maybe for string parsing? ########## solr/solrj/src/test/org/apache/solr/client/solrj/TestLBHttp2SolrClient.java: ########## @@ -205,16 +202,31 @@ public void testTwoServers() throws Exception { } } - public void testReliability() throws Exception { + @LogLevel("org.apache.solr.client.solrj.impl.LBSolrClient=DEBUG") Review Comment: is this needed to make the test pass, or from working on the code? the @logLevel? ########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { Review Comment: I think we have normally a check to see if we are in debug mode before calling the `.debug` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org