[GitHub] [solr] HoustonPutman commented on a diff in pull request #1650: SOLR-16806: Create a BalanceReplicas API

via GitHub Mon, 12 Jun 2023 08:45:56 -0700


HoustonPutman commented on code in PR #1650:
URL: https://github.com/apache/solr/pull/1650#discussion_r1226874002



##########
solr/core/src/java/org/apache/solr/cluster/placement/plugins/SimplePlacementFactory.java:
##########
@@ -53,118 +43,116 @@ public PlacementPlugin createPluginInstance() {
     return new SimplePlacementPlugin();
   }
 
-  public static class SimplePlacementPlugin implements PlacementPlugin {
-    @Override
-    public List<PlacementPlan> computePlacements(
-        Collection<PlacementRequest> requests, PlacementContext 
placementContext)
-        throws PlacementException {
-      List<PlacementPlan> placementPlans = new ArrayList<>(requests.size());
-      Map<Node, ReplicaCount> nodeVsShardCount = 
getNodeVsShardCount(placementContext);
-      for (PlacementRequest request : requests) {
-        int totalReplicasPerShard = 0;
-        for (Replica.ReplicaType rt : Replica.ReplicaType.values()) {
-          totalReplicasPerShard += request.getCountReplicasToCreate(rt);
-        }
-
-        Set<ReplicaPlacement> replicaPlacements =
-            CollectionUtil.newHashSet(totalReplicasPerShard * 
request.getShardNames().size());
-
-        Collection<ReplicaCount> replicaCounts = nodeVsShardCount.values();
-
-        if (request.getTargetNodes().size() < replicaCounts.size()) {
-          replicaCounts =
-              replicaCounts.stream()
-                  .filter(rc -> request.getTargetNodes().contains(rc.node()))
-                  .collect(Collectors.toList());
-        }
-
-        for (String shard : request.getShardNames()) {
-          // Reset the ordering of the nodes for each shard, using the 
replicas added in the
-          // previous shards and assign requests
-          List<Node> nodeList =
-              replicaCounts.stream()
-                  .sorted(
-                      Comparator.<ReplicaCount>comparingInt(
-                              rc -> 
rc.weight(request.getCollection().getName()))
-                          .thenComparing(ReplicaCount::nodeName))
-                  .map(ReplicaCount::node)
-                  .collect(Collectors.toList());
-          int replicaNumOfShard = 0;
-          for (Replica.ReplicaType replicaType : Replica.ReplicaType.values()) 
{
-            for (int i = 0; i < request.getCountReplicasToCreate(replicaType); 
i++) {
-              Node assignedNode = nodeList.get(replicaNumOfShard++ % 
nodeList.size());
-
-              replicaPlacements.add(
-                  placementContext
-                      .getPlacementPlanFactory()
-                      .createReplicaPlacement(
-                          request.getCollection(), shard, assignedNode, 
replicaType));
-
-              ReplicaCount replicaCount =
-                  nodeVsShardCount.computeIfAbsent(assignedNode, 
ReplicaCount::new);
-              replicaCount.totalReplicas++;
-              replicaCount.collectionReplicas.merge(
-                  request.getCollection().getName(), 1, Integer::sum);
-            }
-          }
-        }
-
-        placementPlans.add(
-            placementContext
-                .getPlacementPlanFactory()
-                .createPlacementPlan(request, replicaPlacements));
-      }
-      return placementPlans;
-    }
+  public static class SimplePlacementPlugin extends OrderedNodePlacementPlugin 
{
 
-    private Map<Node, ReplicaCount> getNodeVsShardCount(PlacementContext 
placementContext) {
-      HashMap<Node, ReplicaCount> nodeVsShardCount = new HashMap<>();
-
-      for (Node s : placementContext.getCluster().getLiveDataNodes()) {
-        nodeVsShardCount.computeIfAbsent(s, ReplicaCount::new);
+    @Override
+    protected Map<Node, WeightedNode> getBaseWeightedNodes(
+        PlacementContext placementContext,
+        Set<Node> nodes,
+        Iterable<SolrCollection> relevantCollections,
+        boolean skipNodesWithErrors) {
+      HashMap<Node, WeightedNode> nodeVsShardCount = new HashMap<>();
+
+      for (Node n : nodes) {
+        nodeVsShardCount.computeIfAbsent(n, SameCollWeightedNode::new);
       }
 
-      // if we get here we were not given a createNodeList, build a map with 
real counts.
-      for (SolrCollection collection : 
placementContext.getCluster().collections()) {
-        // identify suitable nodes  by checking the no:of cores in each of them
-        for (Shard shard : collection.shards()) {
-          for (Replica replica : shard.replicas()) {
-            ReplicaCount count = nodeVsShardCount.get(replica.getNode());
-            if (count != null) {
-              count.addReplica(collection.getName(), shard.getShardName());
-            }
-          }
-        }
-      }
       return nodeVsShardCount;
     }
   }
 
-  static class ReplicaCount {
-    public final Node node;
+  private static class SameCollWeightedNode extends 
OrderedNodePlacementPlugin.WeightedNode {
+    private static final int SAME_COL_MULT = 5;
+    private static final int SAME_SHARD_MULT = 1000;
     public Map<String, Integer> collectionReplicas;
-    public int totalReplicas = 0;
+    public int totalWeight = 0;
 
-    ReplicaCount(Node node) {
-      this.node = node;
+    SameCollWeightedNode(Node node) {
+      super(node);
       this.collectionReplicas = new HashMap<>();
     }
 
-    public int weight(String collection) {
-      return (collectionReplicas.getOrDefault(collection, 0) * 5) + 
totalReplicas;
+    /**
+     * The weight of the SameCollWeightedNode is the sum of:

Review Comment:
   This formula kind of existed beforehand.
   
   Basically there didn't use to be a "node weight", but this one class in 
particular had a "node weight for replica". It was basically:
   `(the number of replicas in the collection on the node) * 5 + (total 
replicas on node)`.
   
   As a part of this PR I made this a bit better with:
   `(the number of replicas in the collection on the node) * 5 + (the number of 
replicas in the shard on the node) * 100 + (total replicas on node)`
   
   When trying to convert this to a full node weight, a node with 4 replicas of 
the same collection will be weighted the same as a node with 2 replicas of one 
collection, and 2 replicas of another collection (because `2*5 + 2*5 = 4*5`). 
This isn't really in the spirit of the logic, we want to end up with less 
replicas of the same collections on nodes. So it makes more sense to square the 
number of replicas in the same (collection|shard). That way`12^2*5 + 2^2*5 < 
4^2* 5`. (And then I subtracted 1, because there's no reason to penalize the 
first replica of each shard/collection.)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

[GitHub] [solr] HoustonPutman commented on a diff in pull request #1650: SOLR-16806: Create a BalanceReplicas API

Reply via email to