mxm commented on code in PR #686: URL: https://github.com/apache/flink-kubernetes-operator/pull/686#discussion_r1365724745
########## flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingMetricCollector.java: ########## @@ -138,15 +138,19 @@ public CollectedMetricHistory updateMetrics( metricHistory.put(now, scalingMetrics); stateStore.storeEvaluatedMetrics(ctx, metricHistory); - var collectedMetrics = new CollectedMetricHistory(topology, metricHistory); - - var windowFullTime = metricCollectionStartTs.plus(metricWindowSize); - collectedMetrics.setFullyCollected(!now.isBefore(windowFullTime)); + if (now.isBefore(stableTime)) { + LOG.info("Stabilizing until {}", stableTime); + return new CollectedMetricHistory(topology, Collections.emptySortedMap()); + } - if (!collectedMetrics.isFullyCollected()) { + var collectedMetrics = new CollectedMetricHistory(topology, metricHistory); + if (now.isBefore(windowFullTime)) { LOG.info("Metric window not full until {}", windowFullTime); + } else { + // We clear any metrics from the stabilization interval once the metric window is full + metricHistory.headMap(stableTime).clear(); Review Comment: ```suggestion // Trim metrics outside the metric window from metrics history metricHistory.headMap(now.minus(metricWindowSize)).clear(); ``` ########## flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingMetricCollector.java: ########## @@ -97,32 +99,30 @@ public CollectedMetricHistory updateMetrics( } }); - // The timestamp of the first metric observation marks the start - // If we haven't collected any metrics, we are starting now - var metricCollectionStartTs = metricHistory.isEmpty() ? now : metricHistory.firstKey(); - var jobDetailsInfo = getJobDetailsInfo(ctx, conf.get(AutoScalerOptions.FLINK_CLIENT_TIMEOUT)); var jobUpdateTs = getJobUpdateTs(jobDetailsInfo); - if (jobUpdateTs.isAfter(metricCollectionStartTs)) { + // We detect job change compared to our collected metrics by checking against the earliest + // metric timestamp + if (!metricHistory.isEmpty() && jobUpdateTs.isAfter(metricHistory.firstKey())) { LOG.info("Job updated at {}. Clearing metrics.", jobUpdateTs); stateStore.removeEvaluatedMetrics(ctx); cleanup(ctx.getJobKey()); metricHistory.clear(); - metricCollectionStartTs = now; } var topology = getJobTopology(ctx, stateStore, jobDetailsInfo); + var stableTime = jobUpdateTs.plus(conf.get(AutoScalerOptions.STABILIZATION_INTERVAL)); - // Trim metrics outside the metric window from metrics history + // Calculate timestamp when the metric windows is full var metricWindowSize = getMetricWindowSize(conf); - metricHistory.headMap(now.minus(metricWindowSize)).clear(); + var metricsAfterStable = metricHistory.tailMap(stableTime); + var windowFullTime = + metricsAfterStable.isEmpty() + ? now.plus(metricWindowSize) + : metricsAfterStable.firstKey().plus(metricWindowSize); - var stableTime = jobUpdateTs.plus(conf.get(AutoScalerOptions.STABILIZATION_INTERVAL)); - if (now.isBefore(stableTime)) { - // As long as we are stabilizing, collect no metrics at all - LOG.info("Skipping metric collection during stabilization period until {}", stableTime); - return new CollectedMetricHistory(topology, Collections.emptySortedMap()); - } + // Trim metrics outside the metric window from metrics history + metricHistory.headMap(now.minus(metricWindowSize)).clear(); Review Comment: We only need to evict elements once the window is full. We can perform the cleanup when we mark the window as full. ########## flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingMetricCollector.java: ########## @@ -97,32 +99,30 @@ public CollectedMetricHistory updateMetrics( } }); - // The timestamp of the first metric observation marks the start - // If we haven't collected any metrics, we are starting now - var metricCollectionStartTs = metricHistory.isEmpty() ? now : metricHistory.firstKey(); - var jobDetailsInfo = getJobDetailsInfo(ctx, conf.get(AutoScalerOptions.FLINK_CLIENT_TIMEOUT)); var jobUpdateTs = getJobUpdateTs(jobDetailsInfo); - if (jobUpdateTs.isAfter(metricCollectionStartTs)) { + // We detect job change compared to our collected metrics by checking against the earliest + // metric timestamp + if (!metricHistory.isEmpty() && jobUpdateTs.isAfter(metricHistory.firstKey())) { LOG.info("Job updated at {}. Clearing metrics.", jobUpdateTs); stateStore.removeEvaluatedMetrics(ctx); cleanup(ctx.getJobKey()); metricHistory.clear(); - metricCollectionStartTs = now; } var topology = getJobTopology(ctx, stateStore, jobDetailsInfo); + var stableTime = jobUpdateTs.plus(conf.get(AutoScalerOptions.STABILIZATION_INTERVAL)); - // Trim metrics outside the metric window from metrics history + // Calculate timestamp when the metric windows is full var metricWindowSize = getMetricWindowSize(conf); - metricHistory.headMap(now.minus(metricWindowSize)).clear(); + var metricsAfterStable = metricHistory.tailMap(stableTime); + var windowFullTime = + metricsAfterStable.isEmpty() + ? now.plus(metricWindowSize) + : metricsAfterStable.firstKey().plus(metricWindowSize); - var stableTime = jobUpdateTs.plus(conf.get(AutoScalerOptions.STABILIZATION_INTERVAL)); - if (now.isBefore(stableTime)) { - // As long as we are stabilizing, collect no metrics at all - LOG.info("Skipping metric collection during stabilization period until {}", stableTime); - return new CollectedMetricHistory(topology, Collections.emptySortedMap()); - } + // Trim metrics outside the metric window from metrics history + metricHistory.headMap(now.minus(metricWindowSize)).clear(); Review Comment: ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org