This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch 3.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/3.1 by this push:
new 3e85f58ab9 Do not fall back to tablet servers when no scan servers by
default (#4814)
3e85f58ab9 is described below
commit 3e85f58ab923727ecfc21db1c696de2352da54e2
Author: Keith Turner <[email protected]>
AuthorDate: Tue Aug 27 14:24:36 2024 -0700
Do not fall back to tablet servers when no scan servers by default (#4814)
By default when using eventual scans and there are no scan servers present
then the scan will fall back to tablet servers. This could be very
surprising
and disruptive when tablet servers do not have much resources allocated for
scanning and scan servers do. In the case were scan servers are temporarily
down it could cause a thundering herd that overwhelms tablet servers. This
commit changes the default behavior to not fall back to tablet servers.
This change is targeting 3.1 in order to avoid changing default behavior
in 2.1.X. The capability to not fall back to tablet servers was introduced
in 2.1.3 via #4671 with a default behavior to fallback.
---
.../spi/scan/ConfigurableScanServerSelector.java | 13 +++---
.../scan/ConfigurableScanServerSelectorTest.java | 34 +++++++++++-----
.../test/ScanServerGroupConfigurationIT.java | 2 +
.../accumulo/test/ScanServerIT_NoServers.java | 46 +++++++++++-----------
4 files changed, 54 insertions(+), 41 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java
b/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java
index a7900645ed..129209d9da 100644
---
a/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java
+++
b/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java
@@ -97,12 +97,11 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
* <li>"s" for seconds</li>
* <li>"ms" for milliseconds</li>
* </ul>
- * If duration is not specified this setting defaults to 0s, and will disable
the wait for scan
- * servers and will fall back to tablet servers immediately. When set to a
large value, the selector
- * will effectively wait for scan servers to become available before falling
back to tablet servers.
- * To ensure the selector never falls back scanning tablet servers an
unrealistic wait time can be
- * set. For instanced should be sufficient. Setting Waiting for scan servers
is done via
- * {@link
org.apache.accumulo.core.spi.scan.ScanServerSelector.SelectorParameters#waitUntil(Supplier,
Duration, String)}</li>
+ * If duration is not specified this setting defaults to 100 years, which for
all practical purposes
+ * will never fall back to tablet servers. Waiting for scan servers is done via
+ * {@link
org.apache.accumulo.core.spi.scan.ScanServerSelector.SelectorParameters#waitUntil(Supplier,
Duration, String)}.
+ * To immediately fall back to tablet servers when no scan servers are present
set this to
+ * zero.</li>
* <li><b>attemptPlans : </b> A list of configuration to use for each scan
attempt. Each list object
* has the following fields:
* <ul>
@@ -249,7 +248,7 @@ public class ConfigurableScanServerSelector implements
ScanServerSelector {
int busyTimeoutMultiplier;
String maxBusyTimeout;
String group = ScanServerSelector.DEFAULT_SCAN_SERVER_GROUP_NAME;
- String timeToWaitForScanServers = "0s";
+ String timeToWaitForScanServers = 100 * 365 + "d";
transient boolean parsed = false;
transient long parsedMaxBusyTimeout;
diff --git
a/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java
b/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java
index 54de138520..bb396889c1 100644
---
a/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java
@@ -37,6 +37,7 @@ import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.accumulo.core.client.TimedOutException;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.data.TabletId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
@@ -46,6 +47,8 @@ import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.Test;
+import com.google.common.base.Preconditions;
+
public class ConfigurableScanServerSelectorTest {
static class InitParams implements ScanServerSelector.InitParameters {
@@ -140,7 +143,8 @@ public class ConfigurableScanServerSelectorTest {
@Override
public <T> Optional<T> waitUntil(Supplier<Optional<T>> condition, Duration
maxWaitTime,
String description) {
- throw new UnsupportedOperationException();
+ Preconditions.checkArgument(maxWaitTime.compareTo(Duration.ZERO) > 0);
+ throw new TimedOutException("Timed out w/o checking condition");
}
}
@@ -313,7 +317,7 @@ public class ConfigurableScanServerSelectorTest {
// Intentionally put the default profile in 2nd position. There was a bug
where config parsing
// would fail if the default did not come first.
var opts = Map.of("profiles",
- "[" + profile1 + ", " + defaultProfile + "," + profile2 +
"]".replace('\'', '"'));
+ ("[" + profile1 + ", " + defaultProfile + "," + profile2 +
"]").replace('\'', '"'));
runBusyTest(1000, 0, 5, 5, opts);
runBusyTest(1000, 1, 20, 33, opts);
@@ -372,14 +376,14 @@ public class ConfigurableScanServerSelectorTest {
+ "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}";
var opts1 = Map.of("profiles",
- "[" + defaultProfile + ", " + profile1 + "," + profile2 +
"]".replace('\'', '"'));
+ ("[" + defaultProfile + ", " + profile1 + "," + profile2 +
"]").replace('\'', '"'));
// two profiles activate on the scan type "mega", so should fail
var exception =
assertThrows(IllegalArgumentException.class, () -> runBusyTest(1000,
0, 5, 66, opts1));
assertTrue(exception.getMessage().contains("mega"));
- var opts2 = Map.of("profiles", "[" + profile1 + "]".replace('\'', '"'));
+ var opts2 = Map.of("profiles", ("[" + profile1 + "]").replace('\'', '"'));
// missing a default profile, so should fail
exception =
@@ -391,11 +395,21 @@ public class ConfigurableScanServerSelectorTest {
@Test
public void testNoScanServers() {
- ConfigurableScanServerSelector selector = new
ConfigurableScanServerSelector();
- selector.init(new InitParams(Set.of()));
-
+ // run a 1st test assuming default config does not fallback to tservers,
so should timeout
+ var selector1 = new ConfigurableScanServerSelector();
+ selector1.init(new InitParams(Set.of()));
var tabletId = nti("1", "m");
- ScanServerSelections actions = selector.selectServers(new
SelectorParams(tabletId));
+ assertThrows(TimedOutException.class,
+ () -> selector1.selectServers(new SelectorParams(tabletId)));
+
+ // run a 2nd test with config that does fall back to tablet servers
+ String profile =
"{'isDefault':'true','maxBusyTimeout':'60m','busyTimeoutMultiplier':2,"
+ + " 'timeToWaitForScanServers': '0s',"
+ + "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}";
+ var opts1 = Map.of("profiles", ("[" + profile + "]").replace('\'', '"'));
+ var selector2 = new ConfigurableScanServerSelector();
+ selector2.init(new InitParams(Set.of(), opts1));
+ ScanServerSelections actions = selector2.selectServers(new
SelectorParams(tabletId));
assertNull(actions.getScanServer(tabletId));
assertEquals(Duration.ZERO, actions.getDelay());
assertEquals(Duration.ZERO, actions.getBusyTimeout());
@@ -416,7 +430,7 @@ public class ConfigurableScanServerSelectorTest {
+ "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}";
var opts = Map.of("profiles",
- "[" + defaultProfile + ", " + profile1 + "," + profile2 +
"]".replace('\'', '"'));
+ ("[" + defaultProfile + ", " + profile1 + "," + profile2 +
"]").replace('\'', '"'));
ConfigurableScanServerSelector selector = new
ConfigurableScanServerSelector();
var dg = ScanServerSelector.DEFAULT_SCAN_SERVER_GROUP_NAME;
@@ -493,7 +507,7 @@ public class ConfigurableScanServerSelectorTest {
"{'isDefault':true,'maxBusyTimeout':'5m','busyTimeoutMultiplier':4,'timeToWaitForScanServers':'120s',"
+ "'attemptPlans':[{'servers':'100%', 'busyTimeout':'60s'}]}";
- var opts = Map.of("profiles", "[" + defaultProfile + "]".replace('\'',
'"'));
+ var opts = Map.of("profiles", ("[" + defaultProfile + "]").replace('\'',
'"'));
ConfigurableScanServerSelector selector = new
ConfigurableScanServerSelector();
diff --git
a/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java
b/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java
index 6773251223..0e2aa02e8d 100644
---
a/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java
+++
b/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java
@@ -57,6 +57,7 @@ public class ScanServerGroupConfigurationIT extends
SharedMiniClusterBase {
" \"maxBusyTimeout\": \"5m\","+
" \"busyTimeoutMultiplier\": 8,"+
" \"scanTypeActivations\": [],"+
+ " \"timeToWaitForScanServers\":\"0s\","+
" \"attemptPlans\": ["+
" {"+
" \"servers\": \"3\","+
@@ -80,6 +81,7 @@ public class ScanServerGroupConfigurationIT extends
SharedMiniClusterBase {
" \"busyTimeoutMultiplier\": 8,"+
" \"group\": \"GROUP1\","+
" \"scanTypeActivations\": [\"use_group1\"],"+
+ " \"timeToWaitForScanServers\":\"0s\","+
" \"attemptPlans\": ["+
" {"+
" \"servers\": \"3\","+
diff --git
a/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java
b/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java
index f8dde71bbf..6868d91a9c 100644
--- a/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java
+++ b/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java
@@ -85,7 +85,16 @@ public class ScanServerIT_NoServers extends
SharedMiniClusterBase {
@Test
public void testScan() throws Exception {
- try (AccumuloClient client =
Accumulo.newClient().from(getClientProps()).build()) {
+ var clientProps = new Properties();
+ clientProps.putAll(getClientProps());
+ String scanServerSelectorProfiles =
"[{'isDefault':true,'maxBusyTimeout':'5m',"
+ + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[],
'timeToWaitForScanServers':'0s',"
+ + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]";
+ clientProps.put("scan.server.selector.impl",
ConfigurableScanServerSelector.class.getName());
+ clientProps.put("scan.server.selector.opts.profiles",
+ scanServerSelectorProfiles.replace("'", "\""));
+
+ try (AccumuloClient client =
Accumulo.newClient().from(clientProps).build()) {
String tableName = getUniqueNames(1)[0];
final int ingestedEntryCount = createTableAndIngest(client, tableName,
null, 10, 10, "colf");
@@ -107,7 +116,16 @@ public class ScanServerIT_NoServers extends
SharedMiniClusterBase {
@Test
public void testBatchScan() throws Exception {
- try (AccumuloClient client =
Accumulo.newClient().from(getClientProps()).build()) {
+ var clientProps = new Properties();
+ clientProps.putAll(getClientProps());
+ String scanServerSelectorProfiles =
"[{'isDefault':true,'maxBusyTimeout':'5m',"
+ + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[],
'timeToWaitForScanServers':'0s',"
+ + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]";
+ clientProps.put("scan.server.selector.impl",
ConfigurableScanServerSelector.class.getName());
+ clientProps.put("scan.server.selector.opts.profiles",
+ scanServerSelectorProfiles.replace("'", "\""));
+
+ try (AccumuloClient client =
Accumulo.newClient().from(clientProps).build()) {
String tableName = getUniqueNames(1)[0];
final int ingestedEntryCount = createTableAndIngest(client, tableName,
null, 10, 10, "colf");
@@ -128,17 +146,7 @@ public class ScanServerIT_NoServers extends
SharedMiniClusterBase {
@Test
public void testScanWithNoTserverFallback() throws Exception {
-
- var clientProps = new Properties();
- clientProps.putAll(getClientProps());
- String scanServerSelectorProfiles =
"[{'isDefault':true,'maxBusyTimeout':'5m',"
- + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[],
'timeToWaitForScanServers':'120s',"
- + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]";
- clientProps.put("scan.server.selector.impl",
ConfigurableScanServerSelector.class.getName());
- clientProps.put("scan.server.selector.opts.profiles",
- scanServerSelectorProfiles.replace("'", "\""));
-
- try (AccumuloClient client =
Accumulo.newClient().from(clientProps).build()) {
+ try (AccumuloClient client =
Accumulo.newClient().from(getClientProps()).build()) {
String tableName = getUniqueNames(1)[0];
createTableAndIngest(client, tableName, null, 10, 10, "colf");
@@ -156,17 +164,7 @@ public class ScanServerIT_NoServers extends
SharedMiniClusterBase {
@Test
public void testBatchScanWithNoTserverFallback() throws Exception {
-
- var clientProps = new Properties();
- clientProps.putAll(getClientProps());
- String scanServerSelectorProfiles =
"[{'isDefault':true,'maxBusyTimeout':'5m',"
- + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[],
'timeToWaitForScanServers':'120s',"
- + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]";
- clientProps.put("scan.server.selector.impl",
ConfigurableScanServerSelector.class.getName());
- clientProps.put("scan.server.selector.opts.profiles",
- scanServerSelectorProfiles.replace("'", "\""));
-
- try (AccumuloClient client =
Accumulo.newClient().from(clientProps).build()) {
+ try (AccumuloClient client =
Accumulo.newClient().from(getClientProps()).build()) {
String tableName = getUniqueNames(1)[0];
createTableAndIngest(client, tableName, null, 10, 10, "colf");