[ https://issues.apache.org/jira/browse/HIVE-25429?focusedWorklogId=641044&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-641044 ]
ASF GitHub Bot logged work on HIVE-25429: ----------------------------------------- Author: ASF GitHub Bot Created on: 24/Aug/21 12:02 Start Date: 24/Aug/21 12:02 Worklog Time Spent: 10m Work Description: klcopp commented on a change in pull request #2563: URL: https://github.com/apache/hive/pull/2563#discussion_r694770338 ########## File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java ########## @@ -17,42 +17,59 @@ */ package org.apache.hadoop.hive.ql.txn.compactor; -import com.codahale.metrics.Gauge; import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; -import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.metrics.MetricsConstants; import org.apache.hadoop.hive.ql.txn.compactor.metrics.DeltaFilesMetricReporter; +import org.apache.tez.dag.api.TezConfiguration; +import org.junit.After; import org.junit.Assert; import org.junit.Test; import java.text.MessageFormat; import java.util.HashMap; -import java.util.Map; import java.util.concurrent.TimeUnit; -import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.equivalent; -import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.gaugeToMap; +import static org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.gaugeToMap; +import static org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.verifyMetricsMatch; import static org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver; public class TestCompactionMetricsOnTez extends CompactorOnTezTest { - @Test - public void testDeltaFilesMetric() throws Exception { - MetricsFactory.close(); - HiveConf conf = driver.getConf(); + /** + * Use {@link CompactorOnTezTest#setupWithConf(org.apache.hadoop.hive.conf.HiveConf)} when HiveConf is + * configured to your liking. + */ + @Override + public void setup() { + } - HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true); - MetricsFactory.init(conf); + @After + public void tearDown() { + DeltaFilesMetricReporter.close(); + } + private void configureMetrics(HiveConf conf) { HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_OBSOLETE_DELTA_NUM_THRESHOLD, 0); HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_NUM_THRESHOLD, 0); HiveConf.setTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_REPORTING_INTERVAL, 1, TimeUnit.SECONDS); HiveConf.setTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_CHECK_THRESHOLD, 0, TimeUnit.SECONDS); HiveConf.setFloatVar(conf, HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_PCT_THRESHOLD, 0.7f); + } + + @Test + public void testDeltaFilesMetric() throws Exception { + HiveConf conf = new HiveConf(); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true); + configureMetrics(conf); + setupWithConf(conf); Review comment: We need to be able to set different (conflicting) configs before setupWithConf is called. setup() would need to be parametrized. Do you know how to do that? ########## File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java ########## @@ -105,12 +118,100 @@ public void testDeltaFilesMetric() throws Exception { executeStatementOnDriver("select avg(b) from " + tableName, driver); Thread.sleep(1000); - Assert.assertTrue( - equivalent( - new HashMap<String, String>() {{ + verifyMetricsMatch(new HashMap<String, String>() {{ put(tableName + Path.SEPARATOR + partitionToday, "1"); - }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS))); + }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS)); + } - DeltaFilesMetricReporter.close(); + /** + * Queries shouldn't fail, but metrics should be 0, if tez.counters.max limit is passed. + * @throws Exception + */ + @Test + public void testDeltaFilesMetricTezMaxCounters() throws Exception { + HiveConf conf = new HiveConf(); + conf.setInt(TezConfiguration.TEZ_COUNTERS_MAX, 50); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true); + configureMetrics(conf); + setupWithConf(conf); + + MetricsFactory.close(); + MetricsFactory.init(conf); + DeltaFilesMetricReporter.init(conf); + + String tableName = "test_metrics"; + CompactorOnTezTest.TestDataProvider testDataProvider = new CompactorOnTezTest.TestDataProvider(); + testDataProvider.createFullAcidTable(tableName, true, false); + // Create 51 partitions + for (int i = 0; i < 51; i++) { + executeStatementOnDriver("insert into " + tableName + " values('1', " + i * i + ", '" + i + "')", driver); + } + + // Touch all partitions + executeStatementOnDriver("select avg(b) from " + tableName, driver); + Thread.sleep(1000); + + Assert.assertEquals(0, gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size()); + Assert.assertEquals(0, gaugeToMap(MetricsConstants.COMPACTION_NUM_OBSOLETE_DELTAS).size()); + Assert.assertEquals(0, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS).size()); + } + + /** + * Queries should succeed if additional acid metrics are disabled. + * @throws Exception + */ + @Test + public void testDeltaFilesMetricWithMetricsDisabled() throws Exception { + HiveConf conf = new HiveConf(); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, false); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON, true); + configureMetrics(conf); + super.setupWithConf(conf); + + MetricsFactory.close(); + MetricsFactory.init(conf); + + String tableName = "test_metrics"; + CompactorOnTezTest.TestDataProvider testDataProvider = new CompactorOnTezTest.TestDataProvider(); + testDataProvider.createFullAcidTable(tableName, true, false); + testDataProvider.insertTestDataPartitioned(tableName); + + executeStatementOnDriver("select avg(b) from " + tableName, driver); + + try { + Assert.assertEquals(0, gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size()); Review comment: Sure ########## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java ########## @@ -272,30 +272,34 @@ private void prepare(InputInitializerContext initializerContext) throws IOExcept String groupName = null; String vertexName = null; if (inputInitializerContext != null) { - tezCounters = new TezCounters(); - groupName = HiveInputCounters.class.getName(); - vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, ""); - counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName); - tezCounters.findCounter(groupName, counterName).increment(splits.length); - final List<Path> paths = Utilities.getInputPathsTez(jobConf, work); - counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName); - tezCounters.findCounter(groupName, counterName).increment(paths.size()); - final Set<String> files = new HashSet<>(); - for (InputSplit inputSplit : splits) { - if (inputSplit instanceof FileSplit) { - final FileSplit fileSplit = (FileSplit) inputSplit; - final Path path = fileSplit.getPath(); - // The assumption here is the path is a file. Only case this is different is ACID deltas. - // The isFile check is avoided here for performance reasons. - final String fileStr = path.toString(); - if (!files.contains(fileStr)) { - files.add(fileStr); + try { + tezCounters = new TezCounters(); + groupName = HiveInputCounters.class.getName(); + vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, ""); + counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName); + tezCounters.findCounter(groupName, counterName).increment(splits.length); + final List<Path> paths = Utilities.getInputPathsTez(jobConf, work); + counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName); + tezCounters.findCounter(groupName, counterName).increment(paths.size()); + final Set<String> files = new HashSet<>(); + for (InputSplit inputSplit : splits) { + if (inputSplit instanceof FileSplit) { + final FileSplit fileSplit = (FileSplit) inputSplit; + final Path path = fileSplit.getPath(); + // The assumption here is the path is a file. Only case this is different is ACID deltas. + // The isFile check is avoided here for performance reasons. + final String fileStr = path.toString(); + if (!files.contains(fileStr)) { + files.add(fileStr); + } } } + counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), vertexName); + tezCounters.findCounter(groupName, counterName).increment(files.size()); + DeltaFilesMetricReporter.createCountersForAcidMetrics(tezCounters, jobConf); + } catch (Exception e) { Review comment: Basically, the idea here is that we don't want the query to fail just because the tez counters aren't updated -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 641044) Time Spent: 1h 20m (was: 1h 10m) > Delta metrics collection may cause number of tez counters to exceed > tez.counters.max limit > ------------------------------------------------------------------------------------------ > > Key: HIVE-25429 > URL: https://issues.apache.org/jira/browse/HIVE-25429 > Project: Hive > Issue Type: Sub-task > Components: Hive > Affects Versions: 4.0.0 > Reporter: Karen Coppage > Assignee: Karen Coppage > Priority: Major > Labels: pull-request-available > Time Spent: 1h 20m > Remaining Estimate: 0h > > There's a limit to the number of tez counters allowed (tez.counters.max). > Delta metrics collection (i.e. DeltaFileMetricsReporter) was creating 3 > counters for each partition touched by a given query, which can result in a > huge number of counters, which is unnecessary because we're only interested > in n number of partitions with the most deltas. This change limits the number > of counters created to hive.txn.acid.metrics.max.cache.size*3. > Also when tez.counters.max is reached a LimitExceededException is thrown but > isn't caught on the Hive side and causes the query to fail. We should catch > this and skip delta metrics collection in this case. > Also make sure that metrics are only collected if > hive.metastore.acidmetrics.ext.on=true -- This message was sent by Atlassian Jira (v8.3.4#803005)