HoustonPutman commented on code in PR #561: URL: https://github.com/apache/solr-operator/pull/561#discussion_r1196555332
########## controllers/solr_cluster_ops_util.go: ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controllers + +import ( + "context" + "errors" + solrv1beta1 "github.com/apache/solr-operator/api/v1beta1" + "github.com/apache/solr-operator/controllers/util" + "github.com/go-logr/logr" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "strconv" + "time" +) + +func determineScaleClusterOpLockIfNecessary(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (clusterOpLock string, clusterOpMetadata string, retryLaterDuration time.Duration, err error) { + desiredPods := int(*instance.Spec.Replicas) + configuredPods := int(*statefulSet.Spec.Replicas) + if desiredPods != configuredPods { + scaleTo := -1 + // Start a scaling operation + if desiredPods < configuredPods { + // Scale down! + // The option is enabled by default, so treat "nil" like "true" + if instance.Spec.Autoscaling.VacatePodsOnScaleDown == nil || *instance.Spec.Autoscaling.VacatePodsOnScaleDown { + if desiredPods > 0 { + // We only support one scaling down one pod at-a-time if not scaling down to 0 pods + scaleTo = configuredPods - 1 + } else { + scaleTo = 0 + } + } else { + // The cloud is not setup to use managed scale-down + err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger) + } + } else if desiredPods > configuredPods { + // Scale up! + // TODO: replicasScaleUp is not supported, so do not make a clusterOp out of it, just do the patch + err = scaleCloudUnmanaged(ctx, r, statefulSet, desiredPods, logger) + } + if scaleTo > -1 { + clusterOpLock = util.ScaleLock + clusterOpMetadata = strconv.Itoa(scaleTo) + } + } + return +} + +func handleLockedClusterOpScale(ctx context.Context, r *SolrCloudReconciler, instance *solrv1beta1.SolrCloud, statefulSet *appsv1.StatefulSet, podList []corev1.Pod, logger logr.Logger) (retryLaterDuration time.Duration, err error) { + if scalingToNodes, hasAnn := statefulSet.Annotations[util.ClusterOpsMetadataAnnotation]; hasAnn { + if scalingToNodesInt, convErr := strconv.Atoi(scalingToNodes); convErr != nil { + logger.Error(convErr, "Could not convert statefulSet annotation to int for scale-down-to information", "annotation", util.ClusterOpsMetadataAnnotation, "value", scalingToNodes) + err = convErr + } else { + replicaManagementComplete := false + if scalingToNodesInt < int(*statefulSet.Spec.Replicas) { + // Manage scaling down the SolrCloud + replicaManagementComplete, err = handleManagedCloudScaleDown(ctx, r, instance, statefulSet, scalingToNodesInt, podList, logger) + // } else if scalingToNodesInt > int(*statefulSet.Spec.Replicas) { + // TODO: Utilize the scaled-up nodes in the future, however Solr does not currently have APIs for this. + // TODO: Think about the order of scale-up and restart when individual nodeService IPs are injected into the pods. + // TODO: Will likely want to do a scale-up of the service first, then do the rolling restart of the cluster, then utilize the node. + } else { + // This shouldn't happen. The ScalingToNodesAnnotation is removed when the statefulSet size changes, through a Patch. + // But if it does happen, we should just remove the annotation and move forward. + patchedStatefulSet := statefulSet.DeepCopy() + delete(patchedStatefulSet.Annotations, util.ClusterOpsLockAnnotation) + delete(patchedStatefulSet.Annotations, util.ClusterOpsMetadataAnnotation) + if err = r.Patch(ctx, patchedStatefulSet, client.StrategicMergeFrom(statefulSet)); err != nil { + logger.Error(err, "Error while patching StatefulSet to remove unneeded clusterLockOp annotation for scaling to the current amount of nodes") + } else { + statefulSet = patchedStatefulSet + } + } + + // Scale down the statefulSet to represent the new number of utilizedPods, if it is lower than the current number of pods + // Also remove the "scalingToNodes" annotation, as that acts as a lock on the cluster, so that other operations, + // such as scale-up, pod updates and further scale-down cannot happen at the same time. Review Comment: This is left over from previous implementations. Thanks for catching it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org