This is an automated email from the ASF dual-hosted git repository. chenyulin0719 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/yunikorn-k8shim.git
The following commit(s) were added to refs/heads/master by this push: new 7b89664a [YUNIKORN-2811] Warn if a pod has inconsistent metadata in the shim (#964) 7b89664a is described below commit 7b89664ad52b1bf606509cfef5f1121d076e5887 Author: Yu-Lin Chen <chenyulin0...@apache.org> AuthorDate: Sun Apr 20 05:53:08 2025 +0000 [YUNIKORN-2811] Warn if a pod has inconsistent metadata in the shim (#964) Closes: #964 Signed-off-by: Yu-Lin Chen <chenyulin0...@apache.org> --- pkg/cache/application.go | 3 + pkg/cache/task.go | 49 ++++++---- pkg/common/utils/utils.go | 40 ++------ pkg/common/utils/utils_test.go | 201 +++++++++-------------------------------- 4 files changed, 82 insertions(+), 211 deletions(-) diff --git a/pkg/cache/application.go b/pkg/cache/application.go index 8438957b..d92a4d00 100644 --- a/pkg/cache/application.go +++ b/pkg/cache/application.go @@ -392,6 +392,9 @@ func (app *Application) scheduleTasks(taskScheduleCondition func(t *Task) bool) if taskScheduleCondition(task) { // for each new task, we do a sanity check before moving the state to Pending_Schedule if err := task.sanityCheckBeforeScheduling(); err == nil { + // check inconsistent pod metadata before submitting the task + task.checkPodMetadataBeforeScheduling() + // note, if we directly trigger submit task event, it may spawn too many duplicate // events, because a task might be submitted multiple times before its state transits to PENDING. if handleErr := task.handle( diff --git a/pkg/cache/task.go b/pkg/cache/task.go index 5a3a3dc3..4dbe69e9 100644 --- a/pkg/cache/task.go +++ b/pkg/cache/task.go @@ -21,6 +21,7 @@ package cache import ( "context" "fmt" + "strings" "time" "github.com/looplab/fsm" @@ -533,28 +534,38 @@ func (task *Task) releaseAllocation() { // this reduces the scheduling overhead by blocking such // request away from the core scheduler. func (task *Task) sanityCheckBeforeScheduling() error { - // After version 1.7.0, we should reject the task whose pod is unbound and has conflicting metadata. - if !utils.PodAlreadyBound(task.pod) { - if err := utils.CheckAppIdInPod(task.pod); err != nil { - log.Log(log.ShimCacheTask).Warn("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release", - zap.String("appID", task.applicationID), - zap.String("podName", task.pod.Name), - zap.String("error", err.Error())) + return task.checkPodPVCs() +} - events.GetRecorder().Eventf(task.pod.DeepCopy(), - nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent application metadata and may be rejected in a future YuniKorn release: %s", err.Error())) - } - if err := utils.CheckQueueNameInPod(task.pod); err != nil { - log.Log(log.ShimCacheTask).Warn("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release", - zap.String("appID", task.applicationID), - zap.String("podName", task.pod.Name), - zap.String("error", err.Error())) +// throw a warning if the pod has inconsistent metadata +func (task *Task) checkPodMetadataBeforeScheduling() { + appID := utils.GetApplicationIDFromPod(task.pod) + ignoredAppIDLabels, ignoredAppIDAnnotation := utils.GetIgnoredLabelAnnotationInPod(task.pod, appID, constants.AppIdLabelKeys, constants.AppIdAnnotationKeys) + if len(ignoredAppIDLabels) > 0 || len(ignoredAppIDAnnotation) > 0 { + task.logIgnoredPodMetadata("app-id", appID, ignoredAppIDLabels, ignoredAppIDAnnotation) + } - events.GetRecorder().Eventf(task.pod.DeepCopy(), - nil, v1.EventTypeWarning, "Scheduling", "Scheduling", fmt.Sprintf("Pod has inconsistent queue metadata and may be rejected in a future YuniKorn release: %s", err.Error())) - } + queueName := utils.GetQueueNameFromPod(task.pod) + ignoredQueueLabels, ignoredQueueAnnotation := utils.GetIgnoredLabelAnnotationInPod(task.pod, queueName, constants.QueueLabelKeys, constants.QueueAnnotationKeys) + if len(ignoredQueueLabels) > 0 || len(ignoredQueueAnnotation) > 0 { + task.logIgnoredPodMetadata("queue", queueName, ignoredQueueLabels, ignoredQueueAnnotation) } - return task.checkPodPVCs() +} + +func (task *Task) logIgnoredPodMetadata(metadataType string, fianlValue string, ignoredLabel map[string]string, ignoredAnnotation map[string]string) { + ignoredItems := make([]string, 0) + for key, value := range ignoredLabel { + ignoredItems = append(ignoredItems, fmt.Sprintf("(Label) %s: %s", key, value)) + } + for key, value := range ignoredAnnotation { + ignoredItems = append(ignoredItems, fmt.Sprintf("(Annotation) %s: %s", key, value)) + } + logMessage := fmt.Sprintf("Found multiple '%s' value in pod. { podName: %s, fianlValue: %s, ignored: [%s] }", + metadataType, task.pod.Name, fianlValue, strings.Join(ignoredItems, ", ")) + + log.Log(log.ShimCacheTask).Warn(logMessage) + events.GetRecorder().Eventf(task.pod.DeepCopy(), + nil, v1.EventTypeWarning, "Scheduling", "Scheduling", logMessage) } func (task *Task) checkPodPVCs() error { diff --git a/pkg/common/utils/utils.go b/pkg/common/utils/utils.go index f58c96e5..748d4e2e 100644 --- a/pkg/common/utils/utils.go +++ b/pkg/common/utils/utils.go @@ -213,51 +213,27 @@ func GetApplicationIDFromPod(pod *v1.Pod) string { return GenerateApplicationID(pod.Namespace, conf.GetSchedulerConf().GenerateUniqueAppIds, string(pod.UID)) } -func CheckAppIdInPod(pod *v1.Pod) error { - return ValidatePodLabelAnnotation(pod, constants.AppIdLabelKeys, constants.AppIdAnnotationKeys) -} - -func CheckQueueNameInPod(pod *v1.Pod) error { - return ValidatePodLabelAnnotation(pod, constants.QueueLabelKeys, constants.QueueAnnotationKeys) -} - -// return true if all non-empty values are same across all provided label/annotation -func ValidatePodLabelAnnotation(pod *v1.Pod, labelKeys []string, annotationKeys []string) error { - var referenceKey string - var referenceValue string - var referenceType string +func GetIgnoredLabelAnnotationInPod(pod *v1.Pod, currentValue string, labelKeys []string, annotationKeys []string) (map[string]string, map[string]string) { + ignoredLabel := make(map[string]string, 0) + ignoredAnnotation := make(map[string]string, 0) - checkingType := constants.Label for _, key := range labelKeys { value := GetPodLabelValue(pod, key) - if value == "" { + if value == "" || value == currentValue { continue } - if referenceValue == "" { - referenceKey = key - referenceValue = value - referenceType = checkingType - } else if referenceValue != value { - return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue) - } + ignoredLabel[key] = value } - checkingType = constants.Annotation for _, key := range annotationKeys { value := GetPodAnnotationValue(pod, key) - if value == "" { + if value == "" || value == currentValue { continue } - if referenceValue == "" { - referenceKey = key - referenceValue = value - referenceType = checkingType - } else if referenceValue != value { - return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s: \"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue) - } + ignoredAnnotation[key] = value } - return nil + return ignoredLabel, ignoredAnnotation } // compare the existing pod condition with the given one, return true if the pod condition remains not changed. diff --git a/pkg/common/utils/utils_test.go b/pkg/common/utils/utils_test.go index 60dd1d48..282205f7 100644 --- a/pkg/common/utils/utils_test.go +++ b/pkg/common/utils/utils_test.go @@ -21,7 +21,6 @@ package utils import ( "bytes" "compress/gzip" - "errors" "fmt" "reflect" "strings" @@ -724,205 +723,87 @@ func TestGetApplicationIDFromPod(t *testing.T) { } } -func TestCheckAppIdInPod(t *testing.T) { - testCases := []struct { - name string - pod *v1.Pod - expected error - }{ - { - name: "consistent app ID", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelApplicationID: "app-123", - constants.SparkLabelAppID: "app-123", - constants.LabelApplicationID: "app-123", - }, - Annotations: map[string]string{ - constants.AnnotationApplicationID: "app-123", - }, - }, - }, - expected: nil, - }, - { - name: "inconsistent app ID in labels", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelApplicationID: "app-123", - constants.SparkLabelAppID: "app-456", - }, - }, - }, - expected: errors.New("label spark-app-selector: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""), - }, - { - name: "inconsistent app ID between label and annotation", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelApplicationID: "app-123", - }, - Annotations: map[string]string{ - constants.AnnotationApplicationID: "app-456", - }, - }, - }, - expected: errors.New("annotation yunikorn.apache.org/app-id: \"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - err := CheckAppIdInPod(tc.pod) - if tc.expected != nil { - assert.ErrorContains(t, err, tc.expected.Error()) - } else { - assert.NilError(t, err) - } - }) - } -} - -func TestCheckQueueNameInPod(t *testing.T) { - testCases := []struct { - name string - pod *v1.Pod - expected error - }{ - { - name: "consistent queue name", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelQueueName: "root.a", - constants.LabelQueueName: "root.a", - }, - Annotations: map[string]string{ - constants.AnnotationQueueName: "root.a", - }, - }, - }, - expected: nil, - }, - { - name: "inconsistent app ID in labels", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelQueueName: "root.a", - constants.LabelQueueName: "root.b", - }, - }, - }, - expected: errors.New("label queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""), - }, - { - name: "inconsistent app ID between label and annotation", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - constants.CanonicalLabelQueueName: "root.a", - }, - Annotations: map[string]string{ - constants.AnnotationQueueName: "root.b", - }, - }, - }, - expected: errors.New("annotation yunikorn.apache.org/queue: \"root.b\" doesn't match label yunikorn.apache.org/queue: \"root.a\""), - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - err := CheckQueueNameInPod(tc.pod) - if tc.expected != nil { - assert.ErrorContains(t, err, tc.expected.Error()) - } else { - assert.NilError(t, err) - } - }) - } -} - -func TestValidatePodLabelAnnotation(t *testing.T) { +func TestGetIgnoredLabelAnnotationInPod(t *testing.T) { labelKeys := []string{"labelKey1", "labelKey2"} annotationKeys := []string{"annotationKey1", "annotationKey2"} testCases := []struct { - name string - pod *v1.Pod - expected error + name string + pod *v1.Pod + currentValue string + labelKeys []string + annotationKeys []string + expectedIgnoredLabel map[string]string + expectedIgnoredAnnotation map[string]string }{ { - name: "empty pod", - pod: &v1.Pod{}, - expected: nil, + name: "empty pod", + pod: &v1.Pod{}, + currentValue: "", + labelKeys: labelKeys, + annotationKeys: annotationKeys, + expectedIgnoredLabel: map[string]string{}, + expectedIgnoredAnnotation: map[string]string{}, }, { - name: "pod with all values are consistent", + name: "have ignored label", pod: &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "labelKey1": "value1", - "labelKey2": "value1", - }, - Annotations: map[string]string{ - "annotationKey1": "value1", - "annotationKey2": "value1", + "labelKey2": "value2", }, }, }, - expected: nil, + currentValue: "value1", + labelKeys: labelKeys, + annotationKeys: annotationKeys, + expectedIgnoredLabel: map[string]string{"labelKey2": "value2"}, + expectedIgnoredAnnotation: map[string]string{}, }, { - name: "pod with inconsistent value in labels", + name: "have ignored annotation", pod: &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "labelKey1": "value1", - "labelKey2": "value2", + Annotations: map[string]string{ + "annotationKey1": "value1", + "annotationKey2": "value2", }, }, }, - expected: errors.New("label labelKey2: \"value2\" doesn't match label labelKey1: \"value1\""), + currentValue: "value1", + labelKeys: labelKeys, + annotationKeys: annotationKeys, + expectedIgnoredLabel: map[string]string{}, + expectedIgnoredAnnotation: map[string]string{"annotationKey2": "value2"}, }, { - name: "pod with inconsistent value between label and annotation", + name: "have both ignored label and annotation", pod: &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "labelKey1": "value1", + "labelKey2": "value2", }, - Annotations: map[string]string{ - "annotationKey1": "value2", - }, - }, - }, - expected: errors.New("annotation annotationKey1: \"value2\" doesn't match label labelKey1: \"value1\""), - }, - { - name: "pod with inconsistent value in annotations", - pod: &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ "annotationKey1": "value1", "annotationKey2": "value2", }, }, }, - expected: errors.New("annotation annotationKey2: \"value2\" doesn't match annotation annotationKey1: \"value1\""), + currentValue: "value1", + labelKeys: labelKeys, + annotationKeys: annotationKeys, + expectedIgnoredLabel: map[string]string{"labelKey2": "value2"}, + expectedIgnoredAnnotation: map[string]string{"annotationKey2": "value2"}, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - err := ValidatePodLabelAnnotation(tc.pod, labelKeys, annotationKeys) - if tc.expected != nil { - assert.ErrorContains(t, err, tc.expected.Error()) - } else { - assert.NilError(t, err) - } + ignoredLabel, ignoredAnnotation := GetIgnoredLabelAnnotationInPod(tc.pod, tc.currentValue, tc.labelKeys, tc.annotationKeys) + assert.DeepEqual(t, ignoredLabel, tc.expectedIgnoredLabel) + assert.DeepEqual(t, ignoredAnnotation, tc.expectedIgnoredAnnotation) }) } } --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@yunikorn.apache.org For additional commands, e-mail: issues-h...@yunikorn.apache.org