HoustonPutman commented on code in PR #561: URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196571087
########## controllers/util/solr_update_util.go: ########## @@ -503,82 +503,65 @@ func GetAllManagedSolrNodeNames(solrCloud *solr.SolrCloud) map[string]bool { } // EvictReplicasForPodIfNecessary takes a solr Pod and migrates all replicas off of that Pod, if the Pod is using ephemeral storage. -// If the pod is using persistent storage, this function is a no-op. -// This function MUST be idempotent and return the same list of pods given the same kubernetes/solr state. -func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, logger logr.Logger) (err error, canDeletePod bool) { - var solrDataVolume *corev1.Volume - - // TODO: Remove these checks after v0.7.0, since it will be taken care by the evictReplicas podReadinessCondition - dataVolumeName := solrCloud.DataVolumeName() - for _, volume := range pod.Spec.Volumes { - if volume.Name == dataVolumeName { - solrDataVolume = &volume - break - } - } - - // Only evict if the Data volume is not persistent - if solrDataVolume != nil && solrDataVolume.VolumeSource.PersistentVolumeClaim == nil { - // If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else - // Otherwise, move the replicas to other pods - if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") { - queryParams := url.Values{} - queryParams.Add("action", "DELETENODE") - queryParams.Add("node", SolrNodeName(solrCloud, pod.Name)) - // TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type... - canDeletePod = true - } else { - requestId := "move-replicas-" + pod.Name - - // First check to see if the Async Replace request has started - if asyncState, message, asyncErr := solr_api.CheckAsyncRequest(ctx, solrCloud, requestId); asyncErr != nil { - err = asyncErr - } else if asyncState == "notfound" { - if podHasReplicas { - // Submit new Replace Node request - replaceResponse := &solr_api.SolrAsyncResponse{} - queryParams := url.Values{} - queryParams.Add("action", "REPLACENODE") - queryParams.Add("parallel", "true") - queryParams.Add("sourceNode", SolrNodeName(solrCloud, pod.Name)) - queryParams.Add("async", requestId) - err = solr_api.CallCollectionsApi(ctx, solrCloud, queryParams, replaceResponse) - if hasError, apiErr := solr_api.CheckForCollectionsApiError("REPLACENODE", replaceResponse.ResponseHeader); hasError { - err = apiErr - } - if err == nil { - logger.Info("Migrating all replicas off of pod before deletion.", "requestId", requestId, "pod", pod.Name) - } else { - logger.Error(err, "Could not migrate all replicas off of pod before deletion. Will try again later.", "requestId", requestId, "message", message) - } +// For updates this will only be called for pods using ephemeral data. +// For scale-down operations, this can be called for pods using ephemeral or persistent data. +func EvictReplicasForPodIfNecessary(ctx context.Context, solrCloud *solr.SolrCloud, pod *corev1.Pod, podHasReplicas bool, evictionReason string, logger logr.Logger) (err error, canDeletePod bool) { + logger = logger.WithValues("evictionReason", evictionReason) + // If the Cloud has 1 or zero pods, and this is the "-0" pod, then delete the data since we can't move it anywhere else + // Otherwise, move the replicas to other pods + if (solrCloud.Spec.Replicas == nil || *solrCloud.Spec.Replicas < 2) && strings.HasSuffix(pod.Name, "-0") { + queryParams := url.Values{} + queryParams.Add("action", "DELETENODE") + queryParams.Add("node", SolrNodeName(solrCloud, pod.Name)) + // TODO: Figure out a way to do this, since DeleteNode will not delete the last replica of every type... Review Comment: Ahhh yes.... So this relates very closely to https://github.com/apache/solr-operator/pull/561#discussion_r1195591555 Should we be deleting the replica, or instead maybe scaling up to a second pod, then doing an update, then scaling back down. Either way it's such a small edge-case (running a single solr pod without persistent data), that I'm not sure its worth the effort. Either way we should document the edge case, though it's unrelated to this ticket. We should open another issue for this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org