This is an automated email from the ASF dual-hosted git repository.
domgarguilo pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new b21837a39e Add metrics for scan server reservation write out time and
collisions (#4577)
b21837a39e is described below
commit b21837a39e51968b8cc87dcf9781fa37b5aefd39
Author: Dom G <[email protected]>
AuthorDate: Wed May 22 15:48:29 2024 -0400
Add metrics for scan server reservation write out time and collisions
(#4577)
---
.../accumulo/core/metrics/MetricsProducer.java | 8 ++++--
.../org/apache/accumulo/tserver/ScanServer.java | 11 ++++----
.../apache/accumulo/tserver/ScanServerMetrics.java | 32 +++++++++++++++++++---
.../apache/accumulo/test/metrics/MetricsIT.java | 2 +-
4 files changed, 41 insertions(+), 12 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index ddc9278b43..dd4489b87c 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@ -341,7 +341,7 @@ import io.micrometer.core.instrument.MeterRegistry;
* <tr>
* <th>N/A</th>
* <th>N/A</th>
- * <th>{@value #METRICS_SCAN_RESERVATION_TIMER}</th>
+ * <th>{@value #METRICS_SCAN_RESERVATION_TOTAL_TIMER}</th>
* <th>Timer</th>
* <th>Time to reserve a tablets files for scan</th>
* </tr>
@@ -629,8 +629,12 @@ public interface MetricsProducer {
String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start";
String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue";
String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close";
+ String METRICS_SCAN_RESERVATION_TOTAL_TIMER = METRICS_SCAN_PREFIX +
"reservation.total.timer";
+ String METRICS_SCAN_RESERVATION_WRITEOUT_TIMER =
+ METRICS_SCAN_PREFIX + "reservation.writeout.timer";
String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX +
"busy.timeout.count";
- String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX +
"reservation.timer";
+ String METRICS_SCAN_RESERVATION_CONFLICT_COUNTER =
+ METRICS_SCAN_PREFIX + "reservation.conflict.count";
String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries";
String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX +
"query.results";
String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX +
"query.results.bytes";
diff --git
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
index 8d519c53f4..2ddb76e2cb 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
+import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -601,7 +602,8 @@ public class ScanServer extends AbstractServer
}
if (!filesToReserve.isEmpty()) {
- getContext().getAmple().putScanServerFileReferences(refs);
+ scanServerMetrics.recordWriteOutReservationTime(
+ () -> getContext().getAmple().putScanServerFileReferences(refs));
// After we insert the scan server refs we need to check and see if
the tablet is still
// using the file. As long as the tablet is still using the files then
the Accumulo GC
@@ -635,6 +637,7 @@ public class ScanServer extends AbstractServer
LOG.info("RFFS {} tablet files changed while attempting to reference
files {}",
myReservationId, filesToReserve);
getContext().getAmple().deleteScanServerFileReferences(refs);
+ scanServerMetrics.incrementReservationConflictCount();
return null;
}
}
@@ -669,8 +672,7 @@ public class ScanServer extends AbstractServer
try {
return reserveFiles(extents);
} finally {
- scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
- TimeUnit.NANOSECONDS);
+
scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime()
- start));
}
}
@@ -711,8 +713,7 @@ public class ScanServer extends AbstractServer
try {
return reserveFiles(scanId);
} finally {
- scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
- TimeUnit.NANOSECONDS);
+
scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime()
- start));
}
}
diff --git
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
index 771def8e4f..365c26ceee 100644
---
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
+++
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
@@ -18,6 +18,9 @@
*/
package org.apache.accumulo.tserver;
+import java.time.Duration;
+import java.util.concurrent.atomic.AtomicLong;
+
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
import org.apache.accumulo.core.metrics.MetricsProducer;
@@ -26,14 +29,17 @@ import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.base.Preconditions;
import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.FunctionCounter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics;
public class ScanServerMetrics implements MetricsProducer {
- private Timer reservationTimer;
+ private Timer totalReservationTimer;
+ private Timer writeOutReservationTimer;
private Counter busyTimeoutCount;
+ private final AtomicLong reservationConflictCount = new AtomicLong(0);
private final LoadingCache<KeyExtent,TabletMetadata> tabletMetadataCache;
@@ -43,20 +49,38 @@ public class ScanServerMetrics implements MetricsProducer {
@Override
public void registerMetrics(MeterRegistry registry) {
- reservationTimer =
Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TIMER)
+ totalReservationTimer =
Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TOTAL_TIMER)
.description("Time to reserve a tablets files for
scan").register(registry);
+ writeOutReservationTimer = Timer
+ .builder(MetricsProducer.METRICS_SCAN_RESERVATION_WRITEOUT_TIMER)
+ .description("Time to write out a tablets file reservations for
scan").register(registry);
busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER)
.description("The number of scans where a busy timeout
happened").register(registry);
+ FunctionCounter
+ .builder(METRICS_SCAN_RESERVATION_CONFLICT_COUNTER,
reservationConflictCount,
+ AtomicLong::get)
+ .description(
+ "Counts instances where file reservation attempts for scans
encountered conflicts")
+ .register(registry);
+
Preconditions.checkState(tabletMetadataCache.policy().isRecordingStats(),
"Attempted to instrument cache that is not recording stats.");
CaffeineCacheMetrics.monitor(registry, tabletMetadataCache,
METRICS_SCAN_TABLET_METADATA_CACHE);
}
- public Timer getReservationTimer() {
- return reservationTimer;
+ public void recordTotalReservationTime(Duration time) {
+ totalReservationTimer.record(time);
+ }
+
+ public void recordWriteOutReservationTime(Runnable time) {
+ writeOutReservationTimer.record(time);
}
public void incrementBusy() {
busyTimeoutCount.increment();
}
+
+ public void incrementReservationConflictCount() {
+ reservationConflictCount.getAndIncrement();
+ }
}
diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index 55622d0793..3bb3353529 100644
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@ -103,7 +103,7 @@ public class MetricsIT extends ConfigurableMacBase
implements MetricsProducer {
METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK,
METRICS_SCAN_BUSY_TIMEOUT_COUNTER);
// add sserver as flaky until scan server included in mini tests.
Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS,
METRICS_FATE_TYPE_IN_PROGRESS,
- METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER,
+ METRICS_SCAN_BUSY_TIMEOUT_COUNTER,
METRICS_SCAN_RESERVATION_TOTAL_TIMER,
METRICS_SCAN_TABLET_METADATA_CACHE);
Map<String,String> expectedMetricNames = this.getMetricFields();