This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 6d715fe7d IMPALA-13596: Add warnings and exceptions to reading of
fair-scheduler file.
6d715fe7d is described below
commit 6d715fe7dc2ff2fa549952d5eac384a77bc0357b
Author: Andrew Sherman <[email protected]>
AuthorDate: Wed Dec 11 17:36:47 2024 -0800
IMPALA-13596: Add warnings and exceptions to reading of fair-scheduler file.
The fair-scheduler file contains part of the configuration for Admission
Control. This change adds some better error handling to the parsing of
this file. Where it is safe to do so, new exceptions are thrown; this
will cause Impala to refuse to start. This is consistent with other
serious configuration errors. Where new exceptions might cause problems
with existing configurations, or for less dangerous faults, new warnings
are written to the server log.
For the recently added User Quota configuration (IMPALA-12345) throw an
exception when a duplicate snippet of configuration is found.
New warning log messages are added for these cases:
- when a user quota at the leaf level is completely ignored because of
a user quota at the root level
- when there is no user ACL on a leaf level queue. This prevents any
queries from being submitted to the queue.
Change-Id: Idcd50442ce16e7c4346c6da1624216d694f6f44d
Reviewed-on: http://gerrit.cloudera.org:8080/22209
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../org/apache/impala/util/RequestPoolService.java | 5 +-
.../apache/impala/util/TestRequestPoolService.java | 116 +++++++++++++++++++--
.../duplicate_group_limit_at_leaf.xml | 15 +++
.../duplicate_group_limit_at_root.xml | 13 +++
.../duplicate_user_limit_at_leaf.xml | 15 +++
.../duplicate_user_limit_at_root.xml | 13 +++
.../warnings-fair-scheduler-test.xml | 32 ++++++
.../fair/AllocationFileLoaderService.java | 80 +++++++++++++-
8 files changed, 277 insertions(+), 12 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/util/RequestPoolService.java
b/fe/src/main/java/org/apache/impala/util/RequestPoolService.java
index 4b1ad45a9..291c14243 100644
--- a/fe/src/main/java/org/apache/impala/util/RequestPoolService.java
+++ b/fe/src/main/java/org/apache/impala/util/RequestPoolService.java
@@ -34,12 +34,9 @@ import org.slf4j.LoggerFactory;
import org.apache.impala.authorization.User;
import org.apache.impala.common.ByteUnits;
-import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.InternalException;
-import org.apache.impala.common.JniUtil;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.thrift.TErrorCode;
-import org.apache.impala.thrift.TPoolConfigParams;
import org.apache.impala.thrift.TPoolConfig;
import org.apache.impala.thrift.TResolveRequestPoolParams;
import org.apache.impala.thrift.TResolveRequestPoolResult;
@@ -293,6 +290,8 @@ public class RequestPoolService {
stopInternal();
}
+ public boolean isRunning() { return running_.get(); }
+
/**
* Stops the RequestPoolService instance without checking the running state.
Only
* called by stop() (which is only used in tests) or by start() if a failure
occurs.
diff --git
a/fe/src/test/java/org/apache/impala/util/TestRequestPoolService.java
b/fe/src/test/java/org/apache/impala/util/TestRequestPoolService.java
index 85f356236..ca748a3d0 100644
--- a/fe/src/test/java/org/apache/impala/util/TestRequestPoolService.java
+++ b/fe/src/test/java/org/apache/impala/util/TestRequestPoolService.java
@@ -26,12 +26,16 @@ import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.log4j.AppenderSkeleton;
+import org.apache.log4j.spi.LoggingEvent;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
@@ -47,7 +51,6 @@ import static
org.apache.impala.yarn.server.resourcemanager.scheduler.fair.
AllocationFileLoaderService.addQueryLimits;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.StringUtils;
import
org.apache.impala.yarn.server.resourcemanager.scheduler.fair.AllocationFileLoaderService;
@@ -120,12 +123,12 @@ public class TestRequestPoolService {
*/
private void createPoolService(String allocationFile, String llamaConfFile)
throws Exception {
- allocationConfFile_ = tempFolder.newFile("fair-scheduler-temp-file.xml");
+ allocationConfFile_ = tempFolder.newFile();
Files.copy(getClasspathFile(allocationFile), allocationConfFile_);
String llamaConfPath = null;
if (llamaConfFile != null) {
- llamaConfFile_ = tempFolder.newFile("llama-conf-temp-file.xml");
+ llamaConfFile_ = tempFolder.newFile();
Files.copy(getClasspathFile(llamaConfFile), llamaConfFile_);
llamaConfPath = llamaConfFile_.getAbsolutePath();
}
@@ -162,7 +165,7 @@ public class TestRequestPoolService {
@After
public void cleanUp() throws Exception {
- if (poolService_ != null) poolService_.stop();
+ if (poolService_ != null && poolService_.isRunning()) poolService_.stop();
}
/**
@@ -221,6 +224,108 @@ public class TestRequestPoolService {
checkPoolAcls("root.queueD", asList("userB", "userA"), asList("userZ"));
}
+ /**
+ * @return true if any string in the list contains the given substring.
+ */
+ private static boolean containsSubstring(List<String> list, String
substring) {
+ for (String str : list) {
+ if (str.contains(substring)) { return true; }
+ }
+ return false;
+ }
+
+ /**
+ * Test that the correct exceptions that are thrown when the fair-scheduler
+ * configuration file contains errors.
+ */
+ @Test
+ public void testBadConfiguration() {
+ List<String> bad_configs = Arrays.asList(
+ "duplicate_user_limit_at_leaf.xml",
+ "duplicate_user_limit_at_root.xml",
+ "duplicate_group_limit_at_leaf.xml",
+ "duplicate_group_limit_at_root.xml"
+ );
+ // The expected errors from the config files in bad_configs, above.
+ List<String> expected_errors = Arrays.asList(
+ "Duplicate entry for user 'alice' in pool 'root.group-set-small' has
multiple "
+ + "values 4 and 5",
+ "Duplicate entry for user 'alice' in pool 'root' has multiple values 4
and 5",
+ "Duplicate entry for group 'it' in pool 'root.group-set-small' has
multiple "
+ + "values 2 and 3",
+ "Duplicate entry for group 'it' in pool 'root' has multiple values 2
and 3");
+ for (int i = 0; i < bad_configs.size(); i++) {
+ String config = bad_configs.get(i);
+ String expected_error = expected_errors.get(i);
+ try {
+ createPoolService("bad_configurations/" + config, LLAMA_CONFIG_FILE);
+ Assert.fail("should have got exception");
+ } catch (Exception e) {
+ Assert.assertTrue(e.getMessage().contains(expected_error));
+ }
+ }
+ }
+
+ /**
+ * A log appender that stores log messages, and allows them to be read.
+ */
+ private static class ReadableAppender extends AppenderSkeleton {
+ List<String> messages = new ArrayList<>();
+ @Override
+ protected void append(LoggingEvent loggingEvent) {
+ messages.add(loggingEvent.getMessage().toString());
+ }
+
+ public List<String> getMessages() { return messages; }
+
+ @Override
+ public void close() {}
+
+ @Override
+ public boolean requiresLayout() {
+ return false;
+ }
+ }
+
+ /**
+ * Test the warnings that are produced after the fair-scheduler file has
been read.
+ */
+ @Test
+ public void testVerifyConfiguration() throws Exception {
+ // Capture log messages
+ Log log = LogFactory.getLog(AllocationFileLoaderService.class.getName());
+ Log4JLogger log4JLogger = (Log4JLogger) log;
+ ReadableAppender logAppender = new ReadableAppender();
+ log4JLogger.getLogger().addAppender(logAppender);
+
+ try {
+ List<String> expected_messages = Arrays.asList(
+ "Completed loading allocation file",
+ "Potential misconfiguration in queue 'root.group-set-small': the
user limit " +
+ "for 'howard' of 100 is greater than the root limit 4 and so
will have " +
+ "no effect",
+ "Potential misconfiguration in queue 'root.group-set-small': the
user limit " +
+ "for '*' of 12 is greater than the root limit 8 and so will have
no effect",
+ "Potential misconfiguration in queue 'root.group-set-small': the
group " +
+ "limit for 'support' of 10 is greater than the root limit 6 and
so will " +
+ "have no effect",
+ "Potential misconfiguration in queue 'root.group-set-small': no " +
+ "aclSubmitApps permissions were found, this will prevent query "
+
+ "submission on this queue");
+
+ createPoolService(
+ "bad_configurations/warnings-fair-scheduler-test.xml",
LLAMA_CONFIG_FILE);
+
+ List<String> messages = logAppender.getMessages();
+
+ // Check for expected warnings
+ for (String expected_warning : expected_messages) {
+ Assert.assertTrue("missing message: " + expected_warning + " in " +
messages,
+ containsSubstring(messages, expected_warning));
+ }
+ } finally { log4JLogger.getLogger().removeAppender(logAppender); }
+ }
+
/**
* Check that the access to the pool is as expected.
* @param queueName name of queue.
@@ -497,7 +602,6 @@ public class TestRequestPoolService {
Assert.assertEquals(expected3, parsed3);
}
-
/**
* Unit test for doQueryLimitParsing() error cases.
*/
diff --git
a/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_leaf.xml
b/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_leaf.xml
new file mode 100644
index 000000000..d61da1ee5
--- /dev/null
+++ b/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_leaf.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<allocations>
+ <queue name="root">
+ <queue name="group-set-small">
+ <groupQueryLimit>
+ <group>it</group>
+ <totalCount>2</totalCount>
+ </groupQueryLimit>
+ <groupQueryLimit>
+ <group>it</group>
+ <totalCount>3</totalCount>
+ </groupQueryLimit>
+ </queue>
+ </queue>
+</allocations>
diff --git
a/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_root.xml
b/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_root.xml
new file mode 100644
index 000000000..afbb5fcd8
--- /dev/null
+++ b/fe/src/test/resources/bad_configurations/duplicate_group_limit_at_root.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<allocations>
+ <queue name="root">
+ <groupQueryLimit>
+ <group>it</group>
+ <totalCount>2</totalCount>
+ </groupQueryLimit>
+ <groupQueryLimit>
+ <group>it</group>
+ <totalCount>3</totalCount>
+ </groupQueryLimit>
+ </queue>
+</allocations>
diff --git
a/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_leaf.xml
b/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_leaf.xml
new file mode 100644
index 000000000..4c9256d18
--- /dev/null
+++ b/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_leaf.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<allocations>
+ <queue name="root">
+ <queue name="group-set-small">
+ <userQueryLimit>
+ <user>alice</user>
+ <totalCount>4</totalCount>
+ </userQueryLimit>
+ <userQueryLimit>
+ <user>alice</user>
+ <totalCount>5</totalCount>
+ </userQueryLimit>
+ </queue>
+ </queue>
+</allocations>
diff --git
a/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_root.xml
b/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_root.xml
new file mode 100644
index 000000000..0e202dbc0
--- /dev/null
+++ b/fe/src/test/resources/bad_configurations/duplicate_user_limit_at_root.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<allocations>
+ <queue name="root">
+ <userQueryLimit>
+ <user>alice</user>
+ <totalCount>4</totalCount>
+ </userQueryLimit>
+ <userQueryLimit>
+ <user>alice</user>
+ <totalCount>5</totalCount>
+ </userQueryLimit>
+ </queue>
+</allocations>
diff --git
a/fe/src/test/resources/bad_configurations/warnings-fair-scheduler-test.xml
b/fe/src/test/resources/bad_configurations/warnings-fair-scheduler-test.xml
new file mode 100644
index 000000000..68e57e9e3
--- /dev/null
+++ b/fe/src/test/resources/bad_configurations/warnings-fair-scheduler-test.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<allocations>
+ <queue name="root">
+ <groupQueryLimit>
+ <group>support</group>
+ <totalCount>6</totalCount>
+ </groupQueryLimit>
+ <userQueryLimit>
+ <user>howard</user>
+ <totalCount>4</totalCount>
+ </userQueryLimit>
+ <userQueryLimit>
+ <user>*</user>
+ <totalCount>8</totalCount>
+ </userQueryLimit>
+
+ <queue name="group-set-small">
+ <userQueryLimit>
+ <user>howard</user>
+ <totalCount>100</totalCount>
+ </userQueryLimit>
+ <userQueryLimit>
+ <user>*</user>
+ <totalCount>12</totalCount>
+ </userQueryLimit>
+ <groupQueryLimit>
+ <group>support</group>
+ <totalCount>10</totalCount>
+ </groupQueryLimit>
+ </queue>
+ </queue>
+</allocations>
diff --git
a/java/yarn-extras/src/main/java/org/apache/impala/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java
b/java/yarn-extras/src/main/java/org/apache/impala/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java
index 49b1fa230..cd1bb30eb 100644
---
a/java/yarn-extras/src/main/java/org/apache/impala/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java
+++
b/java/yarn-extras/src/main/java/org/apache/impala/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -245,7 +246,14 @@ public class AllocationFileLoaderService extends
AbstractService {
if (number == -1) {
throw new AllocationConfigurationException("No totalCount for " +
parentName);
}
- for (String name : nameList) { limits.put(name, number); }
+ for (String name : nameList) {
+ Integer oldVal = limits.put(name, number);
+ if (oldVal != null) {
+ throw new AllocationConfigurationException("Duplicate entry for " +
tagName + " '"
+ + name + "' in pool '" + queueName + "' has multiple values " +
oldVal
+ + " and " + number);
+ }
+ }
}
/**
@@ -447,6 +455,9 @@ public class AllocationFileLoaderService extends
AbstractService {
lastSuccessfulReload = clock.getTime();
lastReloadAttemptFailed = false;
+ verifyConfiguration(info);
+ LOG.info("Completed loading allocation file " + allocFile);
+
reloadListener.onReload(info);
}
@@ -591,7 +602,70 @@ public class AllocationFileLoaderService extends
AbstractService {
}
}
- public interface Listener {
- public void onReload(AllocationConfiguration info);
+ /**
+ * After the AllocationConfiguration has been loaded, run some global
+ * consistency checks
+ */
+ public void verifyConfiguration(AllocationConfiguration
allocationConfiguration) {
+ Map<FSQueueType, Set<String>> configuredQueues =
+ allocationConfiguration.getConfiguredQueues();
+ Set<String> parentQueues = configuredQueues.get(FSQueueType.PARENT);
+ Set<String> leafQueues = configuredQueues.get(FSQueueType.LEAF);
+ String root = "root";
+ if (parentQueues.size() == 1 && parentQueues.contains(root)) {
+ Map<String, Integer> rootUserQueryLimits =
+ allocationConfiguration.getUserQueryLimits(root);
+ Map<String, Integer> rootGroupQueryLimits =
+ allocationConfiguration.getGroupQueryLimits(root);
+ for (String leafQueue : leafQueues) {
+ if (leafQueue.startsWith(root)) {
+ Map<String, Integer> groupQueryLimits =
+ allocationConfiguration.getGroupQueryLimits(leafQueue);
+ Map<String, Integer> userQueryLimits =
+ allocationConfiguration.getUserQueryLimits(leafQueue);
+ verifyQueryLimits(leafQueue, "user", rootUserQueryLimits,
userQueryLimits);
+ verifyQueryLimits(leafQueue, "group", rootGroupQueryLimits,
groupQueryLimits);
+
+ verifyLeafAcls(allocationConfiguration, leafQueue);
+ }
+ }
+ }
+ }
+
+ /**
+ * Look for the case where a leaf level User Quota will have no effect
+ * because of a root level quota. Log warnings when this case is found.
+ */
+ private void verifyQueryLimits(String leafQueue, String type,
+ Map<String, Integer> rootQueryLimits, Map<String, Integer> queryLimits) {
+ for (Map.Entry<String, Integer> stringIntegerEntry :
rootQueryLimits.entrySet()) {
+ String key = stringIntegerEntry.getKey();
+ int rootLimit = stringIntegerEntry.getValue();
+ Integer leafLimit = queryLimits.get(key);
+ if (leafLimit != null && leafLimit > rootLimit) {
+ LOG.warn("Potential misconfiguration in queue '" + leafQueue + "': the
" + type
+ + " limit for '" + key + "' of " + leafLimit
+ + " is greater than the root limit " + rootLimit
+ + " and so will have no effect");
+ }
+ }
+ }
+
+ /**
+ * Look for the case where the queue has no SUBMIT acl and log a Warning.
+ */
+ private static void verifyLeafAcls(
+ AllocationConfiguration allocationConfiguration, String leafQueue) {
+ AccessControlList queueAcl =
+ allocationConfiguration.getQueueAcl(leafQueue,
QueueACL.SUBMIT_APPLICATIONS);
+ Collection<String> users = queueAcl.getUsers();
+ Collection<String> groups = queueAcl.getGroups();
+ if (!queueAcl.isAllAllowed() && users.isEmpty() && groups.isEmpty()) {
+ LOG.warn("Potential misconfiguration in queue '" + leafQueue
+ + "': no aclSubmitApps permissions were "
+ + "found, this will prevent query submission on this queue");
+ }
}
+
+ public interface Listener { public void onReload(AllocationConfiguration
info); }
}