This is an automated email from the ASF dual-hosted git repository.
wilfreds pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git
The following commit(s) were added to refs/heads/master by this push:
new 1a07ad75 [YUNIKORN-3087] Remove deprecated "subsystem metrics" code
(#1023)
1a07ad75 is described below
commit 1a07ad75fba01986c3cd56ce97b9eef3d1876ae0
Author: Barry Wu <[email protected]>
AuthorDate: Wed Jun 18 10:58:35 2025 +1000
[YUNIKORN-3087] Remove deprecated "subsystem metrics" code (#1023)
Closes: #1023
Signed-off-by: Wilfred Spiegelenburg <[email protected]>
---
pkg/metrics/queue.go | 61 ++++++++++---------------------------
pkg/metrics/queue_test.go | 53 +-------------------------------
pkg/scheduler/objects/queue_test.go | 41 ++++++++++++-------------
3 files changed, 36 insertions(+), 119 deletions(-)
diff --git a/pkg/metrics/queue.go b/pkg/metrics/queue.go
index cb91a938..34237a62 100644
--- a/pkg/metrics/queue.go
+++ b/pkg/metrics/queue.go
@@ -53,13 +53,9 @@ const (
// QueueMetrics to declare queue metrics
type QueueMetrics struct {
- appMetricsLabel *prometheus.GaugeVec
- // Deprecated - To be removed in 1.7.0. Replaced with queue label
Metrics
- appMetricsSubsystem *prometheus.GaugeVec
+ appMetrics *prometheus.GaugeVec
containerMetrics *prometheus.CounterVec
resourceMetricsLabel *prometheus.GaugeVec
- // Deprecated - To be removed in 1.7.0. Replaced with queue label
Metrics
- resourceMetricsSubsystem *prometheus.GaugeVec
// Track known resource types
knownResourceTypes map[string]struct{}
lock locking.Mutex
@@ -71,7 +67,7 @@ func InitQueueMetrics(name string) *QueueMetrics {
replaceStr := formatMetricName(name)
- q.appMetricsLabel = prometheus.NewGaugeVec(
+ q.appMetrics = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: Namespace,
Name: "queue_app",
@@ -79,14 +75,6 @@ func InitQueueMetrics(name string) *QueueMetrics {
Help: "Queue application metrics. State of the
application includes `new`, `accepted`, `rejected`, `running`, `failing`,
`failed`, `resuming`, `completing`, `completed`.",
}, []string{"state"})
- q.appMetricsSubsystem = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: Namespace,
- Subsystem: replaceStr,
- Name: "queue_app",
- Help: "Queue application metrics. State of the
application includes `new`, `accepted`, `rejected`, `running`, `failing`,
`failed`, `resuming`, `completing`, `completed`.",
- }, []string{"state"})
-
q.containerMetrics = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: Namespace,
@@ -103,20 +91,10 @@ func InitQueueMetrics(name string) *QueueMetrics {
Help: "Queue resource metrics. State of the
resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`,
`maxRunningApps`.",
}, []string{"state", "resource"})
- q.resourceMetricsSubsystem = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Namespace: Namespace,
- Subsystem: replaceStr,
- Name: "queue_resource",
- Help: "Queue resource metrics. State of the
resource includes `guaranteed`, `max`, `allocated`, `pending`, `preempting`,
`maxRunningApps`.",
- }, []string{"state", "resource"})
-
var queueMetricsList = []prometheus.Collector{
- q.appMetricsLabel,
- q.appMetricsSubsystem,
+ q.appMetrics,
q.containerMetrics,
q.resourceMetricsLabel,
- q.resourceMetricsSubsystem,
}
// Register the metrics
@@ -135,11 +113,9 @@ func InitQueueMetrics(name string) *QueueMetrics {
func (m *QueueMetrics) UnregisterMetrics() {
var queueMetricsList = []prometheus.Collector{
- m.appMetricsLabel,
- m.appMetricsSubsystem,
+ m.appMetrics,
m.containerMetrics,
m.resourceMetricsLabel,
- m.resourceMetricsSubsystem,
}
// Unregister the metrics
@@ -149,27 +125,22 @@ func (m *QueueMetrics) UnregisterMetrics() {
}
func (m *QueueMetrics) incQueueApplications(state string) {
- m.appMetricsLabel.WithLabelValues(state).Inc()
- m.appMetricsSubsystem.WithLabelValues(state).Inc()
+ m.appMetrics.WithLabelValues(state).Inc()
}
func (m *QueueMetrics) decQueueApplications(state string) {
- m.appMetricsLabel.WithLabelValues(state).Dec()
- m.appMetricsSubsystem.WithLabelValues(state).Dec()
+ m.appMetrics.WithLabelValues(state).Dec()
}
func (m *QueueMetrics) setQueueResource(state string, resourceName string,
value float64) {
m.resourceMetricsLabel.WithLabelValues(state, resourceName).Set(value)
- m.resourceMetricsSubsystem.WithLabelValues(state,
resourceName).Set(value)
}
func (m *QueueMetrics) Reset() {
m.lock.Lock()
defer m.lock.Unlock()
- m.appMetricsLabel.Reset()
- m.appMetricsSubsystem.Reset()
+ m.appMetrics.Reset()
m.resourceMetricsLabel.Reset()
- m.resourceMetricsSubsystem.Reset()
m.knownResourceTypes = make(map[string]struct{})
}
@@ -183,7 +154,7 @@ func (m *QueueMetrics) DecQueueApplicationsRunning() {
func (m *QueueMetrics) GetQueueApplicationsRunning() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppRunning).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppRunning).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -200,7 +171,7 @@ func (m *QueueMetrics) DecQueueApplicationsNew() {
func (m *QueueMetrics) GetQueueApplicationsNew() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppNew).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppNew).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -217,7 +188,7 @@ func (m *QueueMetrics) DecQueueApplicationsAccepted() {
func (m *QueueMetrics) GetQueueApplicationsAccepted() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppAccepted).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppAccepted).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -230,7 +201,7 @@ func (m *QueueMetrics) IncQueueApplicationsRejected() {
func (m *QueueMetrics) GetQueueApplicationsRejected() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppRejected).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppRejected).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -247,7 +218,7 @@ func (m *QueueMetrics) DecQueueApplicationsResuming() {
func (m *QueueMetrics) GetQueueApplicationsResuming() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppResuming).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppResuming).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -264,7 +235,7 @@ func (m *QueueMetrics) DecQueueApplicationsFailing() {
func (m *QueueMetrics) GetQueueApplicationsFailing() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppFailing).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppFailing).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -277,7 +248,7 @@ func (m *QueueMetrics) IncQueueApplicationsFailed() {
func (m *QueueMetrics) GetQueueApplicationsFailed() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppFailed).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppFailed).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -294,7 +265,7 @@ func (m *QueueMetrics) DecQueueApplicationsCompleting() {
func (m *QueueMetrics) GetQueueApplicationsCompleting() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppCompleting).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppCompleting).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
@@ -307,7 +278,7 @@ func (m *QueueMetrics) IncQueueApplicationsCompleted() {
func (m *QueueMetrics) GetQueueApplicationsCompleted() (int, error) {
metricDto := &dto.Metric{}
- err := m.appMetricsLabel.WithLabelValues(AppCompleted).Write(metricDto)
+ err := m.appMetrics.WithLabelValues(AppCompleted).Write(metricDto)
if err == nil {
return int(*metricDto.Gauge.Value), nil
}
diff --git a/pkg/metrics/queue_test.go b/pkg/metrics/queue_test.go
index ee9a0c93..b77772b3 100644
--- a/pkg/metrics/queue_test.go
+++ b/pkg/metrics/queue_test.go
@@ -276,11 +276,6 @@ func getQueueMetrics() *QueueMetrics {
}
func verifyAppMetrics(t *testing.T, expectedState string) {
- verifyAppMetricsLabel(t, expectedState)
- verifyAppMetricsSubsystem(t, expectedState)
-}
-
-func verifyAppMetricsLabel(t *testing.T, expectedState string) {
checkFn := func(labels []*dto.LabelPair) {
assert.Equal(t, 2, len(labels))
assert.Equal(t, "queue", *labels[0].Name)
@@ -292,16 +287,6 @@ func verifyAppMetricsLabel(t *testing.T, expectedState
string) {
verifyMetricsLabel(t, checkFn)
}
-func verifyAppMetricsSubsystem(t *testing.T, expectedState string) {
- checkFn := func(labels []*dto.LabelPair) {
- assert.Equal(t, 1, len(labels))
- assert.Equal(t, "state", *labels[0].Name)
- assert.Equal(t, expectedState, *labels[0].Value)
- }
-
- verifyMetricsSubsytem(t, checkFn)
-}
-
func verifyContainerMetrics(t *testing.T, expectedState string, value float64)
{
mfs, err := prometheus.DefaultGatherer.Gather()
assert.NilError(t, err)
@@ -326,7 +311,6 @@ func verifyContainerMetrics(t *testing.T, expectedState
string, value float64) {
}
func verifyResourceMetrics(t *testing.T, expectedState, expectedResource
string) {
verifyResourceMetricsLabel(t, expectedState, expectedResource)
- verifyResourceMetricsSubsystem(t, expectedState, expectedResource)
}
func verifyResourceMetricsLabel(t *testing.T, expectedState, expectedResource
string) {
@@ -343,18 +327,6 @@ func verifyResourceMetricsLabel(t *testing.T,
expectedState, expectedResource st
verifyMetricsLabel(t, checkFn)
}
-func verifyResourceMetricsSubsystem(t *testing.T, expectedState,
expectedResource string) {
- checkFn := func(labels []*dto.LabelPair) {
- assert.Equal(t, 2, len(labels))
- assert.Equal(t, "resource", *labels[0].Name)
- assert.Equal(t, expectedResource, *labels[0].Value)
- assert.Equal(t, "state", *labels[1].Name)
- assert.Equal(t, expectedState, *labels[1].Value)
- }
-
- verifyMetricsSubsytem(t, checkFn)
-}
-
func verifyMetricsLabel(t *testing.T, checkLabel func(label []*dto.LabelPair))
{
mfs, err := prometheus.DefaultGatherer.Gather()
assert.NilError(t, err)
@@ -376,32 +348,9 @@ func verifyMetricsLabel(t *testing.T, checkLabel
func(label []*dto.LabelPair)) {
assert.Assert(t, checked, "Failed to find metric")
}
-func verifyMetricsSubsytem(t *testing.T, checkLabel func(label
[]*dto.LabelPair)) {
- mfs, err := prometheus.DefaultGatherer.Gather()
- assert.NilError(t, err)
-
- var checked bool
- for _, metric := range mfs {
- if strings.Contains(metric.GetName(), "yunikorn_root_test") {
- assert.Equal(t, 1, len(metric.Metric))
- assert.Equal(t, dto.MetricType_GAUGE, metric.GetType())
- m := metric.Metric[0]
- checkLabel(m.Label)
- assert.Assert(t, m.Gauge != nil)
- assert.Equal(t, float64(1), *m.Gauge.Value)
- checked = true
- break
- }
- }
-
- assert.Assert(t, checked, "Failed to find metric")
-}
-
func unregisterQueueMetrics() {
- prometheus.Unregister(qm.appMetricsLabel)
- prometheus.Unregister(qm.appMetricsSubsystem)
+ prometheus.Unregister(qm.appMetrics)
prometheus.Unregister(qm.containerMetrics)
prometheus.Unregister(qm.resourceMetricsLabel)
- prometheus.Unregister(qm.resourceMetricsSubsystem)
qm.knownResourceTypes = make(map[string]struct{})
}
diff --git a/pkg/scheduler/objects/queue_test.go
b/pkg/scheduler/objects/queue_test.go
index c0d0b16d..2132e901 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -294,15 +294,14 @@ func TestPendingCalc(t *testing.T) {
if !resources.Equals(leaf.pending, allocRes) {
t.Errorf("leaf queue pending allocation failed to increment
expected %v, got %v", allocRes, leaf.pending)
}
- metrics := []string{"yunikorn_root_queue_resource",
"yunikorn_root_leaf_queue_resource"}
+ metrics := []string{"yunikorn_queue_resource"}
want := concatQueueResourceMetric(metrics, []string{`
-yunikorn_root_queue_resource{resource="memory",state="pending"} 100
-yunikorn_root_queue_resource{resource="vcores",state="pending"} 10
-yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
-`, `
-yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 100
-yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 10
-yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root",resource="memory",state="pending"} 100
+yunikorn_queue_resource{queue="root",resource="vcores",state="pending"} 10
+yunikorn_queue_resource{queue="root",resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="memory",state="pending"}
100
+yunikorn_queue_resource{queue="root.leaf",resource="vcores",state="pending"} 10
+yunikorn_queue_resource{queue="root.leaf",resource="apps",state="maxRunningApps"}
0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer,
strings.NewReader(want), metrics...), "unexpected metrics")
@@ -314,13 +313,12 @@
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
t.Errorf("leaf queue pending allocation failed to decrement
expected 0, got %v", leaf.pending)
}
want = concatQueueResourceMetric(metrics, []string{`
-yunikorn_root_queue_resource{resource="memory",state="pending"} 0
-yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
-yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
-`, `
-yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
-yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
-yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root",resource="memory",state="pending"} 0
+yunikorn_queue_resource{queue="root",resource="vcores",state="pending"} 0
+yunikorn_queue_resource{queue="root",resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="memory",state="pending"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="vcores",state="pending"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="apps",state="maxRunningApps"}
0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer,
strings.NewReader(want), metrics...), "unexpected metrics")
@@ -336,13 +334,12 @@
yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
t.Errorf("leaf queue pending allocation should have failed to
decrement expected zero, got %v", leaf.pending)
}
want = concatQueueResourceMetric(metrics, []string{`
-yunikorn_root_queue_resource{resource="memory",state="pending"} 0
-yunikorn_root_queue_resource{resource="vcores",state="pending"} 0
-yunikorn_root_queue_resource{resource="apps",state="maxRunningApps"} 0
-`, `
-yunikorn_root_leaf_queue_resource{resource="memory",state="pending"} 0
-yunikorn_root_leaf_queue_resource{resource="vcores",state="pending"} 0
-yunikorn_root_leaf_queue_resource{resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root",resource="memory",state="pending"} 0
+yunikorn_queue_resource{queue="root",resource="vcores",state="pending"} 0
+yunikorn_queue_resource{queue="root",resource="apps",state="maxRunningApps"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="memory",state="pending"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="vcores",state="pending"} 0
+yunikorn_queue_resource{queue="root.leaf",resource="apps",state="maxRunningApps"}
0
`},
)
assert.NilError(t, promtu.GatherAndCompare(prometheus.DefaultGatherer,
strings.NewReader(want), metrics...), "unexpected metrics")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]