mlbiscoc commented on code in PR #2935: URL: https://github.com/apache/solr/pull/2935#discussion_r1910689266
########## solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java: ########## @@ -51,7 +52,9 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private String urlScheme; + private Set<String> initialNodes; volatile Set<String> liveNodes; + volatile Set<String> knownNodes; Review Comment: Thats fair. Removed known nodes and put everything into live nodes while using the initial set inside all the time. ########## solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java: ########## @@ -183,4 +204,99 @@ public void testClusterStateProvider() throws SolrServerException, IOException { clusterStateZk.getCollection("col2"), equalTo(clusterStateHttp.getCollection("col2"))); } } + + @Test + public void testClusterStateProviderDownedLiveNodes() throws Exception { + try (var cspZk = zkClientClusterStateProvider(); + var cspHttp = http2ClusterStateProvider()) { + Set<String> expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + Set<String> actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(2, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); + + cluster.startJettySolrRunner(jettyNode1); + cluster.stopJettySolrRunner(jettyNode2); + waitForCSPCacheTimeout(); + + // Should still be reachable because known hosts doesn't remove initial nodes + expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); + } + } + + @Test + public void testClusterStateProviderDownedKnownHosts() throws Exception { + + try (var cspHttp = http2ClusterStateProvider()) { + + String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); + String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); + Set<String> expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); + Set<String> actualKnownNodes = cspHttp.getKnownNodes(); + + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + // Known hosts should never remove the initial set of live nodes + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + } + } + + @Test + public void testClusterStateProviderKnownHostsWithNewHost() throws Exception { + + try (var cspHttp = http2ClusterStateProvider()) { + + var jettyNode3 = cluster.startJettySolrRunner(); + String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); + String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); + String jettyNode3Url = normalizeJettyUrl(jettyNode3.getBaseUrl().toString()); + Set<String> expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url, jettyNode3Url); + waitForCSPCacheTimeout(); + + Set<String> actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(3, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(3, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode3); + expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); + waitForCSPCacheTimeout(); + + // New nodes are removable from known hosts + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + } + } + + private void waitForCSPCacheTimeout() throws InterruptedException { + Thread.sleep(6000); Review Comment: Cache timeout setting is in seconds and pulls an int. Can't make it 1ms. This probably should have had better granularity instead of increments of seconds. I could change cacheTimeout to take milliseconds as the int instead but that would change peoples system property not using the default. So if they set the cache timeout to 5 seconds, it now turns into 5ms if they are not aware of this change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org