This is an automated email from the ASF dual-hosted git repository.
dlmarion pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 99f8863e47 Normalized metric names, added descriptions to
documentation (#5112)
99f8863e47 is described below
commit 99f8863e4754ef6db041e83878a96d596a371155
Author: Dave Marion <[email protected]>
AuthorDate: Tue Nov 26 15:44:21 2024 -0500
Normalized metric names, added descriptions to documentation (#5112)
Updated metric names to reflect their function, not necessarily their
location. Renamed MetricCategory to reflect the section of the docs,
not the category of the metric.
Co-authored-by: Dom G. <[email protected]>
---
.../org/apache/accumulo/core/metrics/Metric.java | 323 +++++++++++----------
.../accumulo/core/metrics/MetricsDocGen.java | 24 +-
.../accumulo/manager/metrics/ManagerMetrics.java | 8 +-
.../compaction/BadCompactionServiceConfigIT.java | 6 +-
4 files changed, 191 insertions(+), 170 deletions(-)
diff --git a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
index 6b71ed2f49..da61305fc2 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
@@ -25,257 +25,261 @@ public enum Metric {
// General Server Metrics
SERVER_IDLE("accumulo.server.idle", MetricType.GAUGE,
"Indicates if the server is idle or not. The value will be 1 when idle
and 0 when not idle.",
- MetricCategory.GENERAL_SERVER),
+ MetricDocSection.GENERAL_SERVER),
LOW_MEMORY("accumulo.detected.low.memory", MetricType.GAUGE,
"Reports 1 when process memory usage is above the threshold, reports 0
when memory is okay.",
- MetricCategory.GENERAL_SERVER),
+ MetricDocSection.GENERAL_SERVER),
+ THRIFT_IDLE("accumulo.thrift.idle", MetricType.DISTRIBUTION_SUMMARY,
+ "Time waiting to execute an RPC request.",
MetricDocSection.GENERAL_SERVER),
+ THRIFT_EXECUTE("accumulo.thrift.execute", MetricType.DISTRIBUTION_SUMMARY,
+ "Time to execute an RPC request.", MetricDocSection.GENERAL_SERVER),
// Compactor Metrics
+ COMPACTION_SVC_ERRORS("accumulo.compaction.svc.misconfigured",
MetricType.GAUGE,
+ "A value of 1 indicates a misconfiguration in the compaction service,
while a value of 0 indicates that the configuration is valid.",
+ MetricDocSection.COMPACTION),
COMPACTOR_MAJC_IN_PROGRESS("accumulo.compaction.majc.in_progress",
MetricType.GAUGE,
"Indicator of whether a compaction is in-progress (value: 1) or not
(value: 0). An"
+ " in-progress compaction could also be stuck.",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_MAJC_STUCK("accumulo.compaction.majc.stuck",
MetricType.LONG_TASK_TIMER,
- "Number and duration of stuck major compactions.",
MetricCategory.COMPACTION),
+ "Number and duration of stuck major compactions.",
MetricDocSection.COMPACTION),
COMPACTOR_MINC_STUCK("accumulo.compaction.minc.stuck",
MetricType.LONG_TASK_TIMER,
- "Number and duration of stuck minor compactions.",
MetricCategory.COMPACTION),
+ "Number and duration of stuck minor compactions.",
MetricDocSection.COMPACTION),
COMPACTOR_ENTRIES_READ("accumulo.compaction.entries.read",
MetricType.FUNCTION_COUNTER,
"Number of entries read by all compactions that have run on this
compactor (majc) or tserver (minc).",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_ENTRIES_WRITTEN("accumulo.compaction.entries.written",
MetricType.FUNCTION_COUNTER,
"Number of entries written by all compactions that have run on this
compactor (majc) or tserver (minc).",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUES("accumulo.compaction.queue.count",
MetricType.GAUGE,
- "Number of priority queues for compaction jobs.",
MetricCategory.COMPACTION),
+ "Number of priority queues for compaction jobs.",
MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_LENGTH("accumulo.compaction.queue.length",
MetricType.GAUGE,
- "Length of priority queue.", MetricCategory.COMPACTION),
+ "Length of priority queue.", MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED("accumulo.compaction.queue.jobs.dequeued",
- MetricType.GAUGE, "Count of dequeued jobs.", MetricCategory.COMPACTION),
+ MetricType.GAUGE, "Count of dequeued jobs.",
MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED("accumulo.compaction.queue.jobs.queued",
- MetricType.GAUGE, "Count of queued jobs.", MetricCategory.COMPACTION),
+ MetricType.GAUGE, "Count of queued jobs.", MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_REJECTED("accumulo.compaction.queue.jobs.rejected",
- MetricType.GAUGE, "Count of rejected jobs.", MetricCategory.COMPACTION),
+ MetricType.GAUGE, "Count of rejected jobs.",
MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_PRIORITY("accumulo.compaction.queue.jobs.priority",
- MetricType.GAUGE, "Lowest priority queued job.",
MetricCategory.COMPACTION),
+ MetricType.GAUGE, "Lowest priority queued job.",
MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MIN_AGE("accumulo.compaction.queue.jobs.min.age",
MetricType.GAUGE, "Minimum age of currently queued jobs in seconds.",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_MAX_AGE("accumulo.compaction.queue.jobs.max.age",
MetricType.GAUGE, "Maximum age of currently queued jobs in seconds.",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_AVG_AGE("accumulo.compaction.queue.jobs.avg.age",
MetricType.GAUGE, "Average age of currently queued jobs in seconds.",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_POLL_TIMER("accumulo.compaction.queue.jobs.exit.time",
MetricType.TIMER, "Tracks time a job spent in the queue before exiting
the queue.",
- MetricCategory.COMPACTION),
+ MetricDocSection.COMPACTION),
// Fate Metrics
FATE_TYPE_IN_PROGRESS("accumulo.fate.ops.in.progress.by.type",
MetricType.GAUGE,
"Number of FATE operations in progress. The op type is designated by the
`op.type` tag.",
- MetricCategory.FATE),
+ MetricDocSection.FATE),
FATE_OPS("accumulo.fate.ops", MetricType.GAUGE,
- "Number of all the current FATE ops in any state.", MetricCategory.FATE),
+ "Number of all the current FATE ops in any state.",
MetricDocSection.FATE),
FATE_OPS_ACTIVITY("accumulo.fate.ops.activity", MetricType.GAUGE,
"Count of the total number of times fate operations are added, updated,
and removed.",
- MetricCategory.FATE),
+ MetricDocSection.FATE),
FATE_ERRORS("accumulo.fate.errors", MetricType.GAUGE,
- "Count of errors that occurred when attempting to gather fate metrics.",
MetricCategory.FATE),
+ "Count of errors that occurred when attempting to gather fate metrics.",
+ MetricDocSection.FATE),
FATE_TX("accumulo.fate.tx", MetricType.GAUGE,
"The state is now in a tag (e.g., state=new, state=in.progress,
state=failed, etc.).",
- MetricCategory.FATE),
+ MetricDocSection.FATE),
// Garbage Collection Metrics
GC_STARTED("accumulo.gc.started", MetricType.GAUGE, "Timestamp GC file
collection cycle started.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_FINISHED("accumulo.gc.finished", MetricType.GAUGE, "Timestamp GC file
collect cycle finished.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_CANDIDATES("accumulo.gc.candidates", MetricType.GAUGE,
- "Number of files that are candidates for deletion.",
MetricCategory.GARBAGE_COLLECTION),
+ "Number of files that are candidates for deletion.",
MetricDocSection.GARBAGE_COLLECTION),
GC_IN_USE("accumulo.gc.in.use", MetricType.GAUGE, "Number of candidate files
still in use.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_DELETED("accumulo.gc.deleted", MetricType.GAUGE, "Number of candidate
files deleted.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_ERRORS("accumulo.gc.errors", MetricType.GAUGE, "Number of candidate
deletion errors.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_STARTED("accumulo.gc.wal.started", MetricType.GAUGE,
- "Timestamp GC WAL collection cycle started.",
MetricCategory.GARBAGE_COLLECTION),
+ "Timestamp GC WAL collection cycle started.",
MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_FINISHED("accumulo.gc.wal.finished", MetricType.GAUGE,
- "Timestamp GC WAL collect cycle finished.",
MetricCategory.GARBAGE_COLLECTION),
+ "Timestamp GC WAL collect cycle finished.",
MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_CANDIDATES("accumulo.gc.wal.candidates", MetricType.GAUGE,
- "Number of files that are candidates for deletion.",
MetricCategory.GARBAGE_COLLECTION),
+ "Number of files that are candidates for deletion.",
MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_IN_USE("accumulo.gc.wal.in.use", MetricType.GAUGE,
- "Number of wal file candidates that are still in use.",
MetricCategory.GARBAGE_COLLECTION),
+ "Number of wal file candidates that are still in use.",
MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_DELETED("accumulo.gc.wal.deleted", MetricType.GAUGE,
- "Number of candidate wal files deleted.",
MetricCategory.GARBAGE_COLLECTION),
+ "Number of candidate wal files deleted.",
MetricDocSection.GARBAGE_COLLECTION),
GC_WAL_ERRORS("accumulo.gc.wal.errors", MetricType.GAUGE,
- "Number candidate wal file deletion errors.",
MetricCategory.GARBAGE_COLLECTION),
+ "Number candidate wal file deletion errors.",
MetricDocSection.GARBAGE_COLLECTION),
GC_POST_OP_DURATION("accumulo.gc.post.op.duration", MetricType.GAUGE,
"GC metadata table post operation duration in milliseconds.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
GC_RUN_CYCLE("accumulo.gc.run.cycle", MetricType.GAUGE,
"Count of gc cycle runs. Value is reset on process start.",
- MetricCategory.GARBAGE_COLLECTION),
+ MetricDocSection.GARBAGE_COLLECTION),
// Tablet Server Metrics
- TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE, "Number of
entries.",
- MetricCategory.TABLET_SERVER),
+ TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE,
+ "Number of entries assigned to a TabletServer.",
MetricDocSection.TABLET_SERVER),
TSERVER_MEM_ENTRIES("accumulo.tserver.entries.mem", MetricType.GAUGE,
- "Number of entries in memory.", MetricCategory.TABLET_SERVER),
- TSERVER_MINC_QUEUED("accumulo.tserver.minc.queued", MetricType.GAUGE,
- "Number of queued minor compactions.", MetricCategory.COMPACTION),
- TSERVER_MINC_RUNNING("accumulo.tserver.minc.running", MetricType.GAUGE,
- "Number of active minor compactions.", MetricCategory.COMPACTION),
- TSERVER_MINC_TOTAL("accumulo.tserver.minc.total", MetricType.GAUGE,
- "Total number of minor compactions performed.",
MetricCategory.COMPACTION),
- TSERVER_TABLETS_ONLINE("accumulo.tserver.tablets.online", MetricType.GAUGE,
- "Number of online tablets.", MetricCategory.TABLET_SERVER),
-
TSERVER_TABLETS_LONG_ASSIGNMENTS("accumulo.tserver.tablets.assignments.warning",
MetricType.GAUGE,
+ "Number of entries in memory.", MetricDocSection.TABLET_SERVER),
+ TSERVER_MINC_QUEUED("accumulo.minc.queued", MetricType.GAUGE,
+ "Number of queued minor compactions.", MetricDocSection.COMPACTION),
+ TSERVER_MINC_RUNNING("accumulo.minc.running", MetricType.GAUGE,
+ "Number of active minor compactions.", MetricDocSection.COMPACTION),
+ TSERVER_MINC_TOTAL("accumulo.minc.total", MetricType.GAUGE,
+ "Total number of minor compactions performed.",
MetricDocSection.COMPACTION),
+ TSERVER_TABLETS_ONLINE("accumulo.tablets.online", MetricType.GAUGE, "Number
of online tablets.",
+ MetricDocSection.TABLET_SERVER),
+ TSERVER_TABLETS_LONG_ASSIGNMENTS("accumulo.tablets.assignments.warning",
MetricType.GAUGE,
"Number of tablet assignments that are taking longer than the configured
warning duration.",
- MetricCategory.TABLET_SERVER),
- TSERVER_TABLETS_OPENING("accumulo.tserver.tablets.opening", MetricType.GAUGE,
- "Number of opening tablets.", MetricCategory.TABLET_SERVER),
- TSERVER_TABLETS_UNOPENED("accumulo.tserver.tablets.unopened",
MetricType.GAUGE,
- "Number of unopened tablets.", MetricCategory.TABLET_SERVER),
- TSERVER_TABLETS_FILES("accumulo.tserver.tablets.files", MetricType.GAUGE,
- "Number of files per tablet.", MetricCategory.TABLET_SERVER),
- TSERVER_INGEST_ENTRIES("accumulo.tserver.ingest.entries", MetricType.GAUGE,
+ MetricDocSection.TABLET_SERVER),
+ TSERVER_TABLETS_OPENING("accumulo.tablets.opening", MetricType.GAUGE,
+ "Number of opening tablets.", MetricDocSection.TABLET_SERVER),
+ TSERVER_TABLETS_UNOPENED("accumulo.tablets.unopened", MetricType.GAUGE,
+ "Number of unopened tablets.", MetricDocSection.TABLET_SERVER),
+ TSERVER_TABLETS_FILES("accumulo.tablets.files", MetricType.GAUGE, "Number of
files per tablet.",
+ MetricDocSection.TABLET_SERVER),
+ TSERVER_INGEST_ENTRIES("accumulo.ingest.entries", MetricType.GAUGE,
"Ingest entry (a key/value) count. The rate can be derived from this
metric.",
- MetricCategory.TABLET_SERVER),
- TSERVER_INGEST_BYTES("accumulo.tserver.ingest.bytes", MetricType.GAUGE,
- "Ingest byte count. The rate can be derived from this metric.",
MetricCategory.TABLET_SERVER),
- TSERVER_HOLD("accumulo.tserver.hold", MetricType.GAUGE,
- "Duration for which commits have been held in milliseconds.",
MetricCategory.TABLET_SERVER),
- TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tserver.tablets.ondemand.online",
MetricType.GAUGE,
- "Number of online on-demand tablets", MetricCategory.TABLET_SERVER),
-
TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM("accumulo.tserver.tablets.ondemand.unloaded.lowmem",
+ MetricDocSection.TABLET_SERVER),
+ TSERVER_INGEST_BYTES("accumulo.ingest.bytes", MetricType.GAUGE,
+ "Ingest byte count. The rate can be derived from this metric.",
+ MetricDocSection.TABLET_SERVER),
+ TSERVER_HOLD("accumulo.ingest.hold", MetricType.GAUGE,
+ "Duration for which commits have been held in milliseconds.",
MetricDocSection.TABLET_SERVER),
+ TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tablets.ondemand.online",
MetricType.GAUGE,
+ "Number of online on-demand tablets", MetricDocSection.TABLET_SERVER),
+
TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM("accumulo.tablets.ondemand.unloaded.lowmem",
MetricType.GAUGE, "Number of online on-demand tablets unloaded due to
low memory",
- MetricCategory.TABLET_SERVER),
+ MetricDocSection.TABLET_SERVER),
- // Scan Metrics
+ // Scan Server Metrics
SCAN_RESERVATION_TOTAL_TIMER("accumulo.scan.reservation.total.timer",
MetricType.TIMER,
- "Time to reserve a tablet's files for scan.",
MetricCategory.SCAN_SERVER),
+ "Time to reserve a tablet's files for scan.",
MetricDocSection.SCAN_SERVER),
SCAN_RESERVATION_WRITEOUT_TIMER("accumulo.scan.reservation.writeout.timer",
MetricType.TIMER,
- "Time to write out a tablets file reservations for scan.",
MetricCategory.SCAN_SERVER),
+ "Time to write out a tablets file reservations for scan.",
MetricDocSection.SCAN_SERVER),
SCAN_RESERVATION_CONFLICT_COUNTER("accumulo.scan.reservation.conflict.count",
MetricType.COUNTER,
"Count of instances where file reservation attempts for scans
encountered conflicts.",
- MetricCategory.SCAN_SERVER),
- SCAN_BUSY_TIMEOUT_COUNT("accumulo.scan.busy.timeout.count",
MetricType.COUNTER,
- "Count of the scans where a busy timeout happened.",
MetricCategory.SCAN_SERVER),
+ MetricDocSection.SCAN_SERVER),
SCAN_TABLET_METADATA_CACHE("accumulo.scan.tablet.metadata.cache",
MetricType.CACHE,
- "Scan server tablet cache metrics.", MetricCategory.SCAN_SERVER),
+ "Scan server tablet cache metrics.", MetricDocSection.SCAN_SERVER),
+
+ // Scan Metrics
+ SCAN_BUSY_TIMEOUT_COUNT("accumulo.scan.busy.timeout.count",
MetricType.COUNTER,
+ "Count of the scans where a busy timeout happened.",
MetricDocSection.SCAN),
SCAN_TIMES("accumulo.scan.times", MetricType.TIMER, "Scan session lifetime
(creation to close).",
- MetricCategory.SCAN),
+ MetricDocSection.SCAN),
SCAN_OPEN_FILES("accumulo.scan.files.open", MetricType.GAUGE, "Number of
files open for scans.",
- MetricCategory.SCAN),
- SCAN_RESULTS("accumulo.scan.result", MetricType.GAUGE, "Results per scan.",
MetricCategory.SCAN),
+ MetricDocSection.SCAN),
+ SCAN_RESULTS("accumulo.scan.result", MetricType.GAUGE, "Results per scan.",
+ MetricDocSection.SCAN),
SCAN_YIELDS("accumulo.scan.yields", MetricType.GAUGE, "Counts scans that
have yielded.",
- MetricCategory.SCAN),
+ MetricDocSection.SCAN),
SCAN_START("accumulo.scan.start", MetricType.COUNTER,
- "Number of calls to start a scan or multiscan.", MetricCategory.SCAN),
+ "Number of calls to start a scan or multiscan.", MetricDocSection.SCAN),
SCAN_CONTINUE("accumulo.scan.continue", MetricType.COUNTER,
- "Number of calls to continue a scan or multiscan.", MetricCategory.SCAN),
+ "Number of calls to continue a scan or multiscan.",
MetricDocSection.SCAN),
SCAN_CLOSE("accumulo.scan.close", MetricType.COUNTER,
- "Number of calls to close a scan or multiscan.", MetricCategory.SCAN),
+ "Number of calls to close a scan or multiscan.", MetricDocSection.SCAN),
SCAN_QUERIES("accumulo.scan.queries", MetricType.GAUGE, "Number of queries
made during scans.",
- MetricCategory.SCAN),
+ MetricDocSection.SCAN),
SCAN_SCANNED_ENTRIES("accumulo.scan.query.scanned.entries", MetricType.GAUGE,
- "Count of scanned entries. The rate can be derived from this metric.",
MetricCategory.SCAN),
+ "Count of scanned entries. The rate can be derived from this metric.",
MetricDocSection.SCAN),
SCAN_QUERY_SCAN_RESULTS("accumulo.scan.query.results", MetricType.GAUGE,
- "Query count. The rate can be derived from this metric.",
MetricCategory.SCAN),
+ "Query count. The rate can be derived from this metric.",
MetricDocSection.SCAN),
SCAN_QUERY_SCAN_RESULTS_BYTES("accumulo.scan.query.results.bytes",
MetricType.GAUGE,
- "Query byte count. The rate can be derived from this metric.",
MetricCategory.SCAN),
+ "Query byte count. The rate can be derived from this metric.",
MetricDocSection.SCAN),
SCAN_PAUSED_FOR_MEM("accumulo.scan.paused.for.memory", MetricType.COUNTER,
- "Count of scans paused due to server being low on memory.",
MetricCategory.SCAN),
+ "Count of scans paused due to server being low on memory.",
MetricDocSection.SCAN),
SCAN_RETURN_FOR_MEM("accumulo.scan.return.early.for.memory",
MetricType.COUNTER,
"Count of scans that returned results early due to server being low on
memory.",
- MetricCategory.SCAN),
+ MetricDocSection.SCAN),
SCAN_ZOMBIE_THREADS("accumulo.scan.zombie.threads", MetricType.GAUGE,
- "Number of scan threads that have no associated client session.",
MetricCategory.SCAN),
+ "Number of scan threads that have no associated client session.",
MetricDocSection.SCAN),
// Major Compaction Metrics
MAJC_PAUSED("accumulo.compaction.majc.paused", MetricType.COUNTER,
- "Number of paused major compactions.", MetricCategory.COMPACTOR),
+ "Number of paused major compactions.", MetricDocSection.COMPACTOR),
// Minor Compaction Metrics
- MINC_QUEUED("accumulo.tserver.compactions.minc.queued", MetricType.TIMER,
- "Queued minor compactions time queued.", MetricCategory.COMPACTION),
- MINC_RUNNING("accumulo.tserver.compactions.minc.running", MetricType.TIMER,
- "Minor compactions time active.", MetricCategory.COMPACTION),
- MINC_PAUSED("accumulo.tserver.compactions.minc.paused", MetricType.COUNTER,
- "Number of paused minor compactions.", MetricCategory.COMPACTION),
+ MINC_QUEUED("accumulo.compaction.minc.queued", MetricType.TIMER,
+ "Queued minor compactions time queued.", MetricDocSection.COMPACTION),
+ MINC_RUNNING("accumulo.compaction.minc.running", MetricType.TIMER,
+ "Minor compactions time active.", MetricDocSection.COMPACTION),
+ MINC_PAUSED("accumulo.compaction.minc.paused", MetricType.COUNTER,
+ "Number of paused minor compactions.", MetricDocSection.COMPACTION),
// Updates (Ingest) Metrics
- UPDATE_ERRORS("accumulo.tserver.updates.error", MetricType.GAUGE,
+ UPDATE_ERRORS("accumulo.updates.error", MetricType.GAUGE,
"Count of errors during tablet updates. Type/reason for error is stored
in the `type` tag (e.g., type=permission, type=unknown.tablet,
type=constraint.violation).",
- MetricCategory.TABLET_SERVER),
- UPDATE_LOCK("accumulo.tserver.updates.lock", MetricType.TIMER,
+ MetricDocSection.TABLET_SERVER),
+ UPDATE_LOCK("accumulo.updates.lock", MetricType.TIMER,
"Average time taken for conditional mutation to get a row lock.",
- MetricCategory.TABLET_SERVER),
- UPDATE_CHECK("accumulo.tserver.updates.check", MetricType.TIMER,
+ MetricDocSection.TABLET_SERVER),
+ UPDATE_CHECK("accumulo.updates.check", MetricType.TIMER,
"Average time taken for conditional mutation to check conditions.",
- MetricCategory.TABLET_SERVER),
- UPDATE_COMMIT("accumulo.tserver.updates.commit", MetricType.TIMER,
- "Average time taken to commit a mutation.",
MetricCategory.TABLET_SERVER),
- UPDATE_COMMIT_PREP("accumulo.tserver.updates.commit.prep", MetricType.TIMER,
- "Average time taken to prepare to commit a single mutation.",
MetricCategory.TABLET_SERVER),
- UPDATE_WALOG_WRITE("accumulo.tserver.updates.walog.write", MetricType.TIMER,
- "Time taken to write a batch of mutations to WAL.",
MetricCategory.TABLET_SERVER),
- UPDATE_MUTATION_ARRAY_SIZE("accumulo.tserver.updates.mutation.arrays.size",
+ MetricDocSection.TABLET_SERVER),
+ UPDATE_COMMIT("accumulo.updates.commit", MetricType.TIMER,
+ "Average time taken to commit a mutation.",
MetricDocSection.TABLET_SERVER),
+ UPDATE_COMMIT_PREP("accumulo.updates.commit.prep", MetricType.TIMER,
+ "Average time taken to prepare to commit a single mutation.",
MetricDocSection.TABLET_SERVER),
+ UPDATE_WALOG_WRITE("accumulo.updates.walog.write", MetricType.TIMER,
+ "Time taken to write a batch of mutations to WAL.",
MetricDocSection.TABLET_SERVER),
+ UPDATE_MUTATION_ARRAY_SIZE("accumulo.updates.mutation.arrays.size",
MetricType.DISTRIBUTION_SUMMARY, "Batch size of mutations from client.",
- MetricCategory.TABLET_SERVER),
-
- // Thrift Metrics
-
- THRIFT_IDLE("accumulo.thrift.idle", MetricType.DISTRIBUTION_SUMMARY,
- "Time waiting to execute an RPC request.", MetricCategory.THRIFT),
- THRIFT_EXECUTE("accumulo.thrift.execute", MetricType.DISTRIBUTION_SUMMARY,
- "Time to execute an RPC request.", MetricCategory.THRIFT),
+ MetricDocSection.TABLET_SERVER),
// Block Cache Metrics
BLOCKCACHE_INDEX_HITCOUNT("accumulo.blockcache.index.hitcount",
MetricType.FUNCTION_COUNTER,
- "Index block cache hit count.", MetricCategory.BLOCK_CACHE),
+ "Index block cache hit count.", MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_INDEX_REQUESTCOUNT("accumulo.blockcache.index.requestcount",
- MetricType.FUNCTION_COUNTER, "Index block cache request count.",
MetricCategory.BLOCK_CACHE),
+ MetricType.FUNCTION_COUNTER, "Index block cache request count.",
+ MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_INDEX_EVICTIONCOUNT("accumulo.blockcache.index.evictioncount",
- MetricType.FUNCTION_COUNTER, "Index block cache eviction count.",
MetricCategory.BLOCK_CACHE),
+ MetricType.FUNCTION_COUNTER, "Index block cache eviction count.",
+ MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_DATA_HITCOUNT("accumulo.blockcache.data.hitcount",
MetricType.FUNCTION_COUNTER,
- "Data block cache hit count.", MetricCategory.BLOCK_CACHE),
+ "Data block cache hit count.", MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_DATA_REQUESTCOUNT("accumulo.blockcache.data.requestcount",
MetricType.FUNCTION_COUNTER,
- "Data block cache request count.", MetricCategory.BLOCK_CACHE),
+ "Data block cache request count.", MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_DATA_EVICTIONCOUNT("accumulo.blockcache.data.evictioncount",
- MetricType.FUNCTION_COUNTER, "Data block cache eviction count.",
MetricCategory.BLOCK_CACHE),
+ MetricType.FUNCTION_COUNTER, "Data block cache eviction count.",
+ MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_SUMMARY_HITCOUNT("accumulo.blockcache.summary.hitcount",
MetricType.FUNCTION_COUNTER,
- "Summary block cache hit count.", MetricCategory.BLOCK_CACHE),
+ "Summary block cache hit count.", MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_SUMMARY_REQUESTCOUNT("accumulo.blockcache.summary.requestcount",
MetricType.FUNCTION_COUNTER, "Summary block cache request count.",
- MetricCategory.BLOCK_CACHE),
+ MetricDocSection.BLOCK_CACHE),
BLOCKCACHE_SUMMARY_EVICTIONCOUNT("accumulo.blockcache.summary.evictioncount",
MetricType.FUNCTION_COUNTER, "Summary block cache eviction count.",
- MetricCategory.BLOCK_CACHE),
+ MetricDocSection.BLOCK_CACHE),
// Manager Metrics
-
MANAGER_BALANCER_MIGRATIONS_NEEDED("accumulo.manager.balancer.migrations.needed",
- MetricType.GAUGE,
+ MANAGER_BALANCER_MIGRATIONS_NEEDED("accumulo.balancer.migrations.needed",
MetricType.GAUGE,
"The number of migrations that need to complete before the system is
balanced.",
- MetricCategory.MANAGER),
- MANAGER_ROOT_TGW_ERRORS("accumulo.manager.tabletmgmt.root.errors",
MetricType.GAUGE,
+ MetricDocSection.MANAGER),
+ MANAGER_ROOT_TGW_ERRORS("accumulo.tabletmgmt.root.errors", MetricType.GAUGE,
"Error count encountered by the TabletGroupWatcher for the ROOT data
level.",
- MetricCategory.MANAGER),
- MANAGER_META_TGW_ERRORS("accumulo.manager.tabletmgmt.meta.errors",
MetricType.GAUGE,
+ MetricDocSection.MANAGER),
+ MANAGER_META_TGW_ERRORS("accumulo.tabletmgmt.meta.errors", MetricType.GAUGE,
"Error count encountered by the TabletGroupWatcher for the META data
level.",
- MetricCategory.MANAGER),
- MANAGER_USER_TGW_ERRORS("accumulo.manager.tabletmgmt.user.errors",
MetricType.GAUGE,
+ MetricDocSection.MANAGER),
+ MANAGER_USER_TGW_ERRORS("accumulo.tabletmgmt.user.errors", MetricType.GAUGE,
"Error count encountered by the TabletGroupWatcher for the USER data
level.",
- MetricCategory.MANAGER),
-
MANAGER_COMPACTION_SVC_ERRORS("accumulo.manager.compaction.svc.misconfigured",
MetricType.GAUGE,
- "A value of 1 indicates a misconfiguration in the compaction service,
while a value of 0 indicates that the configuration is valid.",
- MetricCategory.MANAGER);
+ MetricDocSection.MANAGER);
private final String name;
private final MetricType type;
private final String description;
- private final MetricCategory category;
+ private final MetricDocSection section;
private static final Map<String,Metric> NAME_TO_ENUM_MAP = new HashMap<>();
@@ -285,11 +289,11 @@ public enum Metric {
}
}
- Metric(String name, MetricType type, String description, MetricCategory
category) {
+ Metric(String name, MetricType type, String description, MetricDocSection
section) {
this.name = name;
this.type = type;
this.description = description;
- this.category = category;
+ this.section = section;
}
public String getName() {
@@ -304,36 +308,49 @@ public enum Metric {
return description;
}
- public MetricCategory getCategory() {
- return category;
+ public MetricDocSection getDocSection() {
+ return section;
}
public enum MetricType {
GAUGE, COUNTER, TIMER, LONG_TASK_TIMER, DISTRIBUTION_SUMMARY,
FUNCTION_COUNTER, CACHE
}
- public enum MetricCategory {
- GENERAL_SERVER("General Server Metrics"),
- COMPACTION("Compaction Metrics"),
- COMPACTOR("Compactor Metrics"),
- FATE("Fate Metrics"),
- GARBAGE_COLLECTION("Garbage Collection Metrics"),
- TABLET_SERVER("Tablet Server Metrics"),
- SCAN("Scan Metrics"),
- SCAN_SERVER("Scan Server Metrics"),
- THRIFT("Thrift Metrics"),
- BLOCK_CACHE("Block Cache Metrics"),
- MANAGER("Manager Metrics");
+ public enum MetricDocSection {
+ GENERAL_SERVER("General Server Metrics", "Metrics that are generated
across all server types."),
+ COMPACTION("Compaction Metrics",
+ "Metrics specific to compactions, both minor and major. Metrics for
major compactions"
+ + " will likely have a 'queue.id' tag. The CompactionCoordinator
component in the Manager creates a queue for each CompactionService"
+ + " in the configuration. The 'queue.id' tag may map directly to
the name of a Compactor resource group."),
+ COMPACTOR("Compactor Metrics", "Metrics that are generated by the
Compactor processes."),
+ FATE("Fate Metrics",
+ "Metrics that are generated by the Fate component in the Manager
process."),
+ GARBAGE_COLLECTION("Garbage Collection Metrics",
+ "Metrics that are generated by the Garbage Collector process."),
+ TABLET_SERVER("Tablet Server Metrics",
+ "Metrics that are generated by the TabletServer processes."),
+ SCAN("Scan Metrics",
+ "Metrics specific to scans, which can be executed in the ScanServer or
the TabletServer."),
+ SCAN_SERVER("Scan Server Metrics", "Metrics that are generated by the
ScanServer processes."),
+ BLOCK_CACHE("Block Cache Metrics",
+ "Metrics specific to RFile block cache usage in the ScanServer and
TabletServer processes."),
+ MANAGER("Manager Metrics", "Metrics that are generated by the Manager
process.");
private final String sectionTitle;
+ private final String description;
- MetricCategory(String sectionTitle) {
+ MetricDocSection(String sectionTitle, String description) {
this.sectionTitle = sectionTitle;
+ this.description = description;
}
public String getSectionTitle() {
return sectionTitle;
}
+
+ public String getDescription() {
+ return description;
+ }
}
public static Metric fromName(String name) {
diff --git
a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsDocGen.java
b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsDocGen.java
index 7fd69bbe5d..63679b90fd 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsDocGen.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsDocGen.java
@@ -39,8 +39,8 @@ public class MetricsDocGen {
pageHeader();
generateTableOfContents();
- for (var category : Metric.MetricCategory.values()) {
- generateCategorySection(category, category.getSectionTitle());
+ for (var section : Metric.MetricDocSection.values()) {
+ generateCategorySection(section);
}
}
@@ -52,27 +52,32 @@ public class MetricsDocGen {
doc.println("---\n");
doc.println("<!-- WARNING: Do not edit this file. It is a generated file"
+ " that is copied from Accumulo build (from
core/target/generated-docs) -->\n");
- doc.println("Below are the metrics used to monitor various components of
Accumulo.\n");
+ doc.println(
+ "Below are the metrics used to monitor various components of Accumulo.
Metrics emitted by Accumulo should"
+ + " contain the folowing tags: 'instance.name', 'resource.group',
'process.name', 'host' and 'port'. Metrics"
+ + " emitted by Accumulo may contain additional tags where we think
it makes sense to capture per-object metrics,"
+ + " for example on a table or tablet basis in the ScanServer and
TabletServer, or on a per-queue basis in the"
+ + " CompactionCoordinator.\n");
}
void generateTableOfContents() {
doc.println("## Table of Contents\n");
- for (var category : Metric.MetricCategory.values()) {
+ for (var category : Metric.MetricDocSection.values()) {
String sectionId = generateSectionId(category.getSectionTitle());
doc.println("- [" + category.getSectionTitle() + "](#" + sectionId +
")");
}
doc.println();
}
- void generateCategorySection(Metric.MetricCategory category, String
sectionTitle) {
- String sectionId = generateSectionId(sectionTitle);
- beginSection(sectionTitle, sectionId);
+ void generateCategorySection(Metric.MetricDocSection section) {
+ String sectionId = generateSectionId(section.getSectionTitle());
+ beginSection(section.getSectionTitle(), sectionId,
section.getDescription());
// Enable block-level HTML parsing
doc.println("{::options parse_block_html=\"true\" /}");
for (Metric metric : sortedMetrics) {
- if (metric.getCategory() == category) {
+ if (metric.getDocSection() == section) {
generateMetricSubsection(metric);
}
}
@@ -85,8 +90,9 @@ public class MetricsDocGen {
* Starts a new section in the documentation. In this case a section is a
category of metrics.
* Adds an anchor to the section title so we can link to it.
*/
- void beginSection(String sectionTitle, String sectionId) {
+ void beginSection(String sectionTitle, String sectionId, String
sectionDescription) {
doc.println("\n## <a id=\"" + sectionId + "\"></a>" + sectionTitle + "\n");
+ doc.println(sectionDescription + "\n");
}
/**
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/metrics/ManagerMetrics.java
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/ManagerMetrics.java
index 356e8429fa..031e2750b6 100644
---
a/server/manager/src/main/java/org/apache/accumulo/manager/metrics/ManagerMetrics.java
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/ManagerMetrics.java
@@ -19,7 +19,7 @@
package org.apache.accumulo.manager.metrics;
import static java.util.Objects.requireNonNull;
-import static
org.apache.accumulo.core.metrics.Metric.MANAGER_COMPACTION_SVC_ERRORS;
+import static org.apache.accumulo.core.metrics.Metric.COMPACTION_SVC_ERRORS;
import static org.apache.accumulo.core.metrics.Metric.MANAGER_META_TGW_ERRORS;
import static org.apache.accumulo.core.metrics.Metric.MANAGER_ROOT_TGW_ERRORS;
import static org.apache.accumulo.core.metrics.Metric.MANAGER_USER_TGW_ERRORS;
@@ -93,10 +93,8 @@ public class ManagerMetrics implements MetricsProducer {
.description(MANAGER_META_TGW_ERRORS.getDescription()).register(registry);
Gauge.builder(MANAGER_USER_TGW_ERRORS.getName(), userTGWErrorsGauge,
AtomicLong::get)
.description(MANAGER_USER_TGW_ERRORS.getDescription()).register(registry);
- Gauge
- .builder(MANAGER_COMPACTION_SVC_ERRORS.getName(),
compactionConfigurationError,
- AtomicInteger::get)
-
.description(MANAGER_COMPACTION_SVC_ERRORS.getDescription()).register(registry);
+ Gauge.builder(COMPACTION_SVC_ERRORS.getName(),
compactionConfigurationError, AtomicInteger::get)
+
.description(COMPACTION_SVC_ERRORS.getDescription()).register(registry);
}
public List<MetricsProducer> getProducers(AccumuloConfiguration conf,
Manager manager) {
diff --git
a/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java
b/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java
index b29b43aa77..b63f97963f 100644
---
a/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java
+++
b/test/src/main/java/org/apache/accumulo/test/compaction/BadCompactionServiceConfigIT.java
@@ -19,7 +19,7 @@
package org.apache.accumulo.test.compaction;
import static
org.apache.accumulo.core.Constants.DEFAULT_COMPACTION_SERVICE_NAME;
-import static
org.apache.accumulo.core.metrics.Metric.MANAGER_COMPACTION_SVC_ERRORS;
+import static org.apache.accumulo.core.metrics.Metric.COMPACTION_SVC_ERRORS;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.Collections;
@@ -141,7 +141,7 @@ public class BadCompactionServiceConfigIT extends
AccumuloClusterHarness {
if (shutdownTailer.get()) {
break;
}
- if (s.startsWith(MANAGER_COMPACTION_SVC_ERRORS.getName())) {
+ if (s.startsWith(COMPACTION_SVC_ERRORS.getName())) {
Metric m = TestStatsDSink.parseStatsDMetric(s);
Integer value = Integer.parseInt(m.getValue());
if (value == 0) {
@@ -265,7 +265,7 @@ public class BadCompactionServiceConfigIT extends
AccumuloClusterHarness {
if (shutdownTailer.get()) {
break;
}
- if (s.startsWith(MANAGER_COMPACTION_SVC_ERRORS.getName())) {
+ if (s.startsWith(COMPACTION_SVC_ERRORS.getName())) {
Metric m = TestStatsDSink.parseStatsDMetric(s);
Integer value = Integer.parseInt(m.getValue());
if (value == 0) {