HoustonPutman commented on code in PR #561:
URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196554404


##########
controllers/util/solr_update_util.go:
##########
@@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud 
*solr.SolrCloud) map[string]bool {
 }
 
 // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas 
off of that Pod, if the Pod is using ephemeral storage.
-// If the pod is using persistent storage, this function is a no-op.
-// This function MUST be idempotent and return the same list of pods given the 
same kubernetes/solr state.
-func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud 
*solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err 
error, canDeletePod bool) {
-       var solrDataVolume *corev1.Volume
-
-       // TODO: Remove these checks after v0.7.0, since it will be taken care 
by the evictReplicas podReadinessCondition
-       dataVolumeName := solrCloud.DataVolumeName()
-       for _, volume := range pod.Spec.Volumes {
-               if volume.Name == dataVolumeName {
-                       solrDataVolume = &volume
-                       break
-               }
-       }
-
-       // Only evict if the Data volume is not persistent
-       if solrDataVolume != nil && 
solrDataVolume.VolumeSource.PersistentVolumeClaim == nil {
-               // If the Cloud has 1 or zero pods, and this is the "-0" pod, 
then delete the data since we can't move it anywhere else
-               // Otherwise, move the replicas to other pods
-               if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas 
< 2) && strings.HasSuffix(pod.Name, "-0") {
-                       queryParams := url.Values{}
-                       queryParams.Add("action", "DELETENODE")
-                       queryParams.Add("node", SolrNodeName(solrCloud, 
pod.Name))
-                       // TODO: Figure out a way to do this, since DeleteNode 
will not delete the last replica of every type...
-                       canDeletePod = true
-               } else {
-                       requestId := "move-replicas-" + pod.Name
-
-                       // First check to see if the Async Replace request has 
started
-                       if asyncState, message, asyncErr := 
solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
-                               err = asyncErr
-                       } else if asyncState == "notfound" {
-                               if podHasReplicas {
-                                       // Submit new Replace Node request
-                                       replaceResponse := 
&solr_api.SolrAsyncResponse{}
-                                       queryParams := url.Values{}
-                                       queryParams.Add("action", "REPLACENODE")
-                                       queryParams.Add("parallel", "true")
-                                       queryParams.Add("sourceNode", 
SolrNodeName(solrCloud, pod.Name))
-                                       queryParams.Add("async", requestId)
-                                       err = solr_api.CallCollectionsApi(ctx, 
solrCloud, queryParams, replaceResponse)
-                                       if hasError, apiErr := 
solr_api.CheckForCollectionsApiError("REPLACENODE", 
replaceResponse.ResponseHeader); hasError {
-                                               err = apiErr
-                                       }
-                                       if err == nil {
-                                               logger.Info("Migrating all 
replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name)
-                                       } else {
-                                               logger.Error(err, "Could not 
migrate all replicas off of pod before deletion. Will try again later.", 
"requestId", requestId, "message", message)
-                                       }
+// For updates this will only be called for pods using ephemeral data.
+// For scale-down operations, this can be called for pods using ephemeral or 
persistent data.
+func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud 
*solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, 
logger logr.Logger) (err error, canDeletePod bool) {
+       logger = logger.WithValues("evictionReason", evictionReason)
+       // If the Cloud has 1 or zero pods, and this is the "-0" pod, then 
delete the data since we can't move it anywhere else
+       // Otherwise, move the replicas to other pods
+       if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && 
strings.HasSuffix(pod.Name, "-0") {
+               queryParams := url.Values{}
+               queryParams.Add("action", "DELETENODE")
+               queryParams.Add("node", SolrNodeName(solrCloud, pod.Name))
+               // TODO: Figure out a way to do this, since DeleteNode will not 
delete the last replica of every type...
+               canDeletePod = true
+       } else {
+               requestId := "move-replicas-" + pod.Name
+
+               // First check to see if the Async Replace request has started
+               if asyncState, message, asyncErr := 
solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil {
+                       err = asyncErr
+               } else if asyncState == "notfound" {
+                       if podHasReplicas {
+                               // Submit new Replace Node request
+                               replaceResponse := &solr_api.SolrAsyncResponse{}
+                               queryParams := url.Values{}
+                               queryParams.Add("action", "REPLACENODE")
+                               queryParams.Add("parallel", "true")
+                               queryParams.Add("sourceNode", 
SolrNodeName(solrCloud, pod.Name))
+                               queryParams.Add("async", requestId)
+                               err = solr_api.CallCollectionsApi(ctx, 
solrCloud, queryParams, replaceResponse)
+                               if hasError, apiErr := 
solr_api.CheckForCollectionsApiError("REPLACENODE", 
replaceResponse.ResponseHeader); hasError {
+                                       err = apiErr
+                               }
+                               if err == nil {
+                                       logger.Info("Migrating all replicas off 
of pod before deletion.", "requestId", requestId, "pod", pod.Name)
                                } else {
-                                       canDeletePod = true
+                                       logger.Error(err, "Could not migrate 
all replicas off of pod before deletion. Will try again later.", "requestId", 
requestId, "message", message)
                                }
-
                        } else {
-                               logger.Info("Found async status", "requestId", 
requestId, "state", asyncState)
-                               // Only continue to delete the pod if the 
ReplaceNode request is complete and successful
-                               if asyncState == "completed" {
-                                       canDeletePod = true
-                                       logger.Info("Migration of all replicas 
off of pod before deletion complete. Pod can now be deleted.", "pod", pod.Name)
-                               } else if asyncState == "failed" {
-                                       logger.Info("Migration of all replicas 
off of pod before deletion failed. Will try again.", "pod", pod.Name, 
"message", message)
-                               }
+                               canDeletePod = true
+                       }
 
-                               // Delete the async request Id if the async 
request is successful or failed.
-                               // If the request failed, this will cause a 
retry since the next reconcile won't find the async requestId in Solr.
-                               if asyncState == "completed" || asyncState == 
"failed" {
-                                       if message, err = 
solr_api.DeleteAsyncRequest(ctx, solrCloud, requestId); err != nil {
-                                               logger.Error(err, "Could not 
delete Async request status.", "requestId", requestId, "message", message)
-                                       } else {
-                                               canDeletePod = false
-                                       }
+               } else {
+                       logger.Info("Found async status", "requestId", 
requestId, "state", asyncState)
+                       // Only continue to delete the pod if the ReplaceNode 
request is complete and successful
+                       if asyncState == "completed" {
+                               canDeletePod = true

Review Comment:
   Yeah that's certainly fair. I would love to see a Solr feature that we can 
say "Don't put replicas on this node", so that we can set that flag first, and 
then the check wouldn't be necessary. But you are right.
   
   If I remember correctly, that's how it worked initially. So it shouldn't be 
a problem to go back to that.
   
   If we do this here, we should certainly do it the same way in the scale down 
operations. Currently it just waits for the command to succeed, just like here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to