dsmiley commented on code in PR #2160: URL: https://github.com/apache/solr/pull/2160#discussion_r1601881299
########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java: ########## @@ -480,35 +488,70 @@ public RequestWriter getRequestWriter() { return requestWriter; } + private boolean isServerAlive(ServerWrapper zombieServer) + throws SolrServerException, IOException { + if (null == aliveCheckQuery) { + log.debug("Assuming success because aliveCheckQuery is null"); + return true; + } + log.debug("Running ping check on server " + zombieServer.getBaseUrl()); + QueryRequest queryRequest = new QueryRequest(aliveCheckQuery); + queryRequest.setBasePath(zombieServer.baseUrl); + return queryRequest.process(getClient(zombieServer.getBaseUrl())).getStatus() == 0; + } + + private void handleServerBackUp(ServerWrapper zombieServer) { + // server has come back up. + // make sure to remove from zombies before adding to alive to avoid a race condition + // where another thread could mark it down, move it back to zombie, and then we delete + // from zombie and lose it forever. + ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); + if (wrapper != null) { + wrapper.failedPings = 0; + if (wrapper.standard) { + addToAlive(wrapper); + } + } else { + // something else already moved the server from zombie to alive + } + } + + private void handleServerDown(ServerWrapper zombieServer) { + // Expected. The server is still down. + zombieServer.failedPings++; + zombieServer.skipAliveCheckIters = this.aliveCheckSkipIters; + + // If the server doesn't belong in the standard set belonging to this load balancer + // then simply drop it after a certain number of failed pings. + if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { + zombieServers.remove(zombieServer.getBaseUrl()); + } + } + private void checkAZombieServer(ServerWrapper zombieServer) { try { - QueryRequest queryRequest = new QueryRequest(solrQuery); - queryRequest.setBasePath(zombieServer.baseUrl); - QueryResponse resp = queryRequest.process(getClient(zombieServer.getBaseUrl())); - if (resp.getStatus() == 0) { - // server has come back up. - // make sure to remove from zombies before adding to alive to avoid a race condition - // where another thread could mark it down, move it back to zombie, and then we delete - // from zombie and lose it forever. - ServerWrapper wrapper = zombieServers.remove(zombieServer.getBaseUrl()); - if (wrapper != null) { - wrapper.failedPings = 0; - if (wrapper.standard) { - addToAlive(wrapper); - } - } else { - // something else already moved the server from zombie to alive - } + // push back on liveness checks only every Nth iteration + if (zombieServer.skipAliveCheckIters > 0) { + log.debug( + "Skipping liveness check for server " + + zombieServer.getBaseUrl() + + " because skipAliveCheckIters = " + + zombieServer.skipAliveCheckIters); + zombieServer.skipAliveCheckIters--; + return; } - } catch (Exception e) { - // Expected. The server is still down. - zombieServer.failedPings++; - // If the server doesn't belong in the standard set belonging to this load balancer - // then simply drop it after a certain number of failed pings. - if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) { - zombieServers.remove(zombieServer.getBaseUrl()); + if (isServerAlive(zombieServer)) { + log.debug("Successfully pinged server " + zombieServer.getBaseUrl() + ", marking it alive"); Review Comment: Always use "{}" templates in SLF4J -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org