This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 2956e98e2d5 migrate hdfs integration tests to embedded-tests (#19158)
2956e98e2d5 is described below
commit 2956e98e2d5bcc6e780c7bc313cca4ebffa6a51f
Author: Clint Wylie <[email protected]>
AuthorDate: Mon Mar 16 23:28:26 2026 -0700
migrate hdfs integration tests to embedded-tests (#19158)
---
embedded-tests/pom.xml | 224 +++++++++++++++++++++
.../AbstractAzureInputSourceParallelIndexTest.java | 13 --
.../AbstractGcsInputSourceParallelIndexTest.java | 16 --
.../AbstractHdfsInputSourceParallelIndexTest.java | 170 ++++++++++++++++
.../hdfs/AzureToHdfsParallelIndexTest.java | 58 ++++++
.../embedded/hdfs/GcsToHdfsParallelIndexTest.java | 59 ++++++
.../testing/embedded/hdfs/HdfsStorageResource.java | 134 ++++++++++++
.../druid/testing/embedded/hdfs/HdfsTestUtil.java | 83 ++++++++
.../hdfs/HdfsToAzureParallelIndexTest.java | 82 ++++++++
.../embedded/hdfs/HdfsToGcsParallelIndexTest.java | 66 ++++++
.../embedded/hdfs/HdfsToHdfsParallelIndexTest.java | 53 +++++
.../embedded/hdfs/HdfsToS3ParallelIndexTest.java | 70 +++++++
.../embedded/hdfs/S3ToHdfsParallelIndexTest.java | 58 ++++++
.../java/org/apache/druid/tests/TestNGGroup.java | 23 ---
.../AbstractAzureInputSourceParallelIndexTest.java | 136 -------------
.../AbstractGcsInputSourceParallelIndexTest.java | 136 -------------
.../AbstractHdfsInputSourceParallelIndexTest.java | 127 ------------
.../AbstractS3InputSourceParallelIndexTest.java | 136 -------------
.../indexer/ITAzureToHdfsParallelIndexTest.java | 51 -----
.../indexer/ITGcsToHdfsParallelIndexTest.java | 52 -----
.../indexer/ITHdfsToAzureParallelIndexTest.java | 47 -----
.../indexer/ITHdfsToGcsParallelIndexTest.java | 48 -----
.../indexer/ITHdfsToHdfsParallelIndexTest.java | 58 ------
.../tests/indexer/ITHdfsToS3ParallelIndexTest.java | 47 -----
.../tests/indexer/ITS3ToHdfsParallelIndexTest.java | 51 -----
25 files changed, 1057 insertions(+), 941 deletions(-)
diff --git a/embedded-tests/pom.xml b/embedded-tests/pom.xml
index f92d9bb3af5..1cc344d4ca5 100644
--- a/embedded-tests/pom.xml
+++ b/embedded-tests/pom.xml
@@ -274,6 +274,12 @@
<artifactId>druid-iceberg-extensions</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
@@ -348,6 +354,18 @@
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.druid.extensions</groupId>
+ <artifactId>druid-hdfs-storage</artifactId>
+ <version>${project.parent.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client-minicluster</artifactId>
+ <version>${hadoop.compile.version}</version>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>org.apache.druid.extensions</groupId>
<artifactId>druid-parquet-extensions</artifactId>
@@ -666,6 +684,212 @@
<version>0.13.0</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>com.google.api</groupId>
+ <artifactId>gax</artifactId>
+ <version>2.37.0</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client-api</artifactId>
+ <version>${hadoop.compile.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <!-- needed by iceberg extension, this is copied from iceberg pom -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-buffer</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-net</groupId>
+ <artifactId>commons-net</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.ws.rs</groupId>
+ <artifactId>jsr311-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.findbugs</groupId>
+ <artifactId>jsr305</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-webapp</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.protobuf</groupId>
+ <artifactId>protobuf-java</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-recipes</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math3</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>net.java.dev.jets3t</groupId>
+ <artifactId>jets3t</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.github.pjfanning</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.jcraft</groupId>
+ <artifactId>jsch</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <!-- Following are excluded to remove security vulnerabilities: -->
+ <exclusion>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.reload4j</groupId>
+ <artifactId>reload4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-servlet</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-reload4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.nimbusds</groupId>
+ <artifactId>nimbus-jose-jwt</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.github.stephenc.jcip</groupId>
+ <artifactId>jcip-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.curator</groupId>
+ <artifactId>curator-framework</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.dropwizard.metrics</groupId>
+ <artifactId>metrics-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>dnsjava</groupId>
+ <artifactId>dnsjava</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/azure/AbstractAzureInputSourceParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/azure/AbstractAzureInputSourceParallelIndexTest.java
index 61a4b160bae..80e7e72decc 100644
---
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/azure/AbstractAzureInputSourceParallelIndexTest.java
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/azure/AbstractAzureInputSourceParallelIndexTest.java
@@ -25,7 +25,6 @@ import
org.apache.druid.storage.azure.output.AzureStorageConnectorModule;
import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
import
org.apache.druid.testing.embedded.indexer.AbstractCloudInputSourceParallelIndexTest;
import org.junit.Assert;
-import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
@@ -70,18 +69,6 @@ public abstract class
AbstractAzureInputSourceParallelIndexTest extends Abstract
}
}
- @AfterAll
- public void deleteDataFilesFromAzure()
- {
- try {
- // Deleting uploaded data files
- azure.deleteStorageContainer();
- }
- catch (Exception e) {
- LOG.warn(e, "Unable to delete container in azure");
- }
- }
-
public void validateAzureSegmentFilesDeleted(String path)
{
List<URI> segmentFiles = ImmutableList.of();
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/gcs/AbstractGcsInputSourceParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/gcs/AbstractGcsInputSourceParallelIndexTest.java
index 8634c2e08ec..2b39b05ff95 100644
---
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/gcs/AbstractGcsInputSourceParallelIndexTest.java
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/gcs/AbstractGcsInputSourceParallelIndexTest.java
@@ -23,7 +23,6 @@ import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.storage.google.output.GoogleStorageConnectorModule;
import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
import
org.apache.druid.testing.embedded.indexer.AbstractCloudInputSourceParallelIndexTest;
-import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
@@ -71,19 +70,4 @@ public class AbstractGcsInputSourceParallelIndexTest extends
AbstractCloudInputS
LOG.warn(e, "Unable to delete segments from GCS");
}
}
-
- @AfterAll
- public void deleteDataFilesFromGcs()
- {
- LOG.info("Deleting data files from GCS");
- try {
- for (String file : fileList()) {
- // Deleting uploaded data files
- gcs.deleteFileFromGcs(file);
- }
- }
- catch (Exception e) {
- LOG.warn(e, "Unable to delete files in GCS");
- }
- }
}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AbstractHdfsInputSourceParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AbstractHdfsInputSourceParallelIndexTest.java
new file mode 100644
index 00000000000..93f7c6580aa
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AbstractHdfsInputSourceParallelIndexTest.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.testing.embedded.indexer.AbstractITBatchIndexTest;
+import org.junit.jupiter.api.BeforeAll;
+
+import java.io.Closeable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Function;
+
+/**
+ * Abstract base class for embedded parallel index tests where HDFS is the
input source.
+ * Subclasses supply their own {@link HdfsStorageResource} and register
whatever deep-storage
+ * resource they need (e.g. another {@link HdfsStorageResource} in
deep-storage mode, or an
+ * Azure / GCS resource).
+ *
+ * <p>Test data ({@code tiny_wiki_*.json}) is uploaded to HDFS before any test
runs and deleted
+ * after all tests complete.
+ */
+public abstract class AbstractHdfsInputSourceParallelIndexTest extends
AbstractITBatchIndexTest
+{
+ private static final Logger LOG = new
Logger(AbstractHdfsInputSourceParallelIndexTest.class);
+ private static final String HDFS_INDEX_TASK =
"/indexer/wikipedia_cloud_index_task.json";
+ private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
+ private static final String HDFS_DATA_PATH = "/data/json";
+
+ private static final String DATA_1 = "tiny_wiki_1.json";
+ private static final String DATA_2 = "tiny_wiki_2.json";
+ private static final String DATA_3 = "tiny_wiki_3.json";
+
+ /**
+ * Placeholder replaced with the actual {@code hdfs://localhost:PORT} URL at
test time.
+ */
+ private static final String HDFS_URL_PLACEHOLDER = "%%HDFS_URL%%";
+
+ private HdfsTestUtil hdfs;
+
+ /**
+ * Returns the {@link HdfsStorageResource} that holds the HDFS input data.
+ * This instance must have been registered with the embedded cluster in
+ * {@link #addResources}.
+ */
+ protected abstract HdfsStorageResource getHdfsResource();
+
+ /**
+ * Data provider: returns (inputSourcePropertyKey, inputSourcePropertyValue)
pairs that exercise
+ * different forms of the HDFS input source spec.
+ * <ul>
+ * <li>A single directory path (HDFS reads all files in the directory)</li>
+ * <li>A list of explicit file paths</li>
+ * </ul>
+ * The {@value HDFS_URL_PLACEHOLDER} token is replaced with the real
NameNode URL when the
+ * test task is submitted.
+ */
+ public static Object[][] resources()
+ {
+ final String dataBase = HDFS_URL_PLACEHOLDER + HDFS_DATA_PATH;
+ return new Object[][]{
+ {new Pair<>("paths", dataBase)},
+ {new Pair<>("paths", List.of(
+ dataBase + "/" + DATA_1,
+ dataBase + "/" + DATA_2,
+ dataBase + "/" + DATA_3
+ ))}
+ };
+ }
+
+ public static List<String> fileList()
+ {
+ return Arrays.asList(DATA_1, DATA_2, DATA_3);
+ }
+
+ @BeforeAll
+ public void setupHdfs() throws Exception
+ {
+ LOG.info("Uploading test data files to HDFS");
+ hdfs = new HdfsTestUtil(getHdfsResource().getFileSystem(), HDFS_DATA_PATH);
+ hdfs.uploadDataFilesToHdfs(List.of(
+ "data/json/" + DATA_1,
+ "data/json/" + DATA_2,
+ "data/json/" + DATA_3
+ ));
+ }
+
+ /**
+ * Runs a parallel-index test reading from HDFS.
+ *
+ * @param hdfsInputSource {@code lhs} is the input-source property key
({@code "paths"}),
+ * {@code rhs} is the value (String or {@link List})
+ * with {@value HDFS_URL_PLACEHOLDER} placeholders
+ * @param segmentAvailabilityConfirmationPair segment-availability check
flags
+ */
+ protected String doHdfsTest(
+ Pair<String, Object> hdfsInputSource,
+ Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair
+ ) throws Exception
+ {
+ final String indexDatasource = dataSource;
+ try (final Closeable ignored = unloader(indexDatasource)) {
+ final String hdfsUrl = getHdfsResource().getHdfsUrl();
+
+ final Function<String, String> transform = spec -> {
+ try {
+ // Replace the HDFS URL placeholder in the serialised input-source
value.
+ String inputSourceValue =
jsonMapper.writeValueAsString(hdfsInputSource.rhs);
+ inputSourceValue = StringUtils.replace(inputSourceValue,
HDFS_URL_PLACEHOLDER, hdfsUrl);
+
+ spec = StringUtils.replace(
+ spec,
+ "%%INPUT_FORMAT_TYPE%%",
+ InputFormatDetails.JSON.getInputFormatType()
+ );
+ spec = StringUtils.replace(
+ spec,
+ "%%PARTITIONS_SPEC%%",
+ jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
+ );
+ spec = StringUtils.replace(spec, "%%INPUT_SOURCE_TYPE%%", "hdfs");
+ // sharing index_task.json with cloud which has this one
+ spec = StringUtils.replace(
+ spec,
+ "%%INPUT_SOURCE_PROPERTIES%%",
+ "null"
+ );
+ spec = StringUtils.replace(spec, "%%INPUT_SOURCE_PROPERTY_KEY%%",
hdfsInputSource.lhs);
+ spec = StringUtils.replace(spec, "%%INPUT_SOURCE_PROPERTY_VALUE%%",
inputSourceValue);
+ return spec;
+ }
+ catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ };
+
+ doIndexTest(
+ indexDatasource,
+ HDFS_INDEX_TASK,
+ transform,
+ INDEX_QUERIES_RESOURCE,
+ false,
+ true,
+ true,
+ segmentAvailabilityConfirmationPair
+ );
+ return indexDatasource;
+ }
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AzureToHdfsParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AzureToHdfsParallelIndexTest.java
new file mode 100644
index 00000000000..9bd9a899978
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/AzureToHdfsParallelIndexTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import
org.apache.druid.testing.embedded.azure.AbstractAzureInputSourceParallelIndexTest;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.List;
+
+/**
+ * Embedded parallel index test that reads input data from Azure Blob Storage
(Azurite
+ * testcontainer) and stores segments in HDFS (in-process MiniDFSCluster).
+ *
+ * <p>The HDFS resource is registered after the Azure resource so that the
HDFS deep-storage
+ * configuration ({@code druid.storage.type=hdfs}) overrides the Azure
configuration set by
+ * the Azurite resource. The Azure input-source connection properties remain
active and are used
+ * by the indexer to read data from Azurite.
+ */
+public class AzureToHdfsParallelIndexTest extends
AbstractAzureInputSourceParallelIndexTest
+{
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(true);
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // Azure/Azurite resource first: uploads data to Azurite and sets Azure
connection properties.
+ super.addResources(cluster);
+ // HDFS resource second: overrides druid.storage.type to "hdfs" for deep
storage.
+ cluster.addResource(hdfsResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testAzureIndexData(Pair<String, List<?>> azureInputSource)
throws Exception
+ {
+ doTest(azureInputSource, new Pair<>(false, false), "azure", null);
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/GcsToHdfsParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/GcsToHdfsParallelIndexTest.java
new file mode 100644
index 00000000000..f8cd6c21a98
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/GcsToHdfsParallelIndexTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import
org.apache.druid.testing.embedded.gcs.AbstractGcsInputSourceParallelIndexTest;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.List;
+
+/**
+ * Embedded parallel index test that reads input data from Google Cloud
Storage (FakeGcsServer
+ * testcontainer) and stores segments in HDFS (in-process MiniDFSCluster).
+ *
+ * <p>The HDFS resource is registered after the GCS resource so that the HDFS
deep-storage
+ * configuration ({@code druid.storage.type=hdfs}) overrides the GCS
configuration set by the
+ * FakeGcsServer resource. The GCS input-source connection properties remain
active and are used
+ * by the indexer to read data from FakeGcsServer.
+ */
+public class GcsToHdfsParallelIndexTest extends
AbstractGcsInputSourceParallelIndexTest
+{
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(true);
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // GCS/FakeGcsServer resource first: uploads data to FakeGcsServer and
sets GCS connection
+ // properties.
+ super.addResources(cluster);
+ // HDFS resource second: overrides druid.storage.type to "hdfs" for deep
storage.
+ cluster.addResource(hdfsResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testGcsIndexData(Pair<String, List<?>> gcsInputSource) throws
Exception
+ {
+ doTest(gcsInputSource, new Pair<>(false, false), "google", null);
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsStorageResource.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsStorageResource.java
new file mode 100644
index 00000000000..36b1252cf2e
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsStorageResource.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.FileUtils;
+import org.apache.druid.storage.hdfs.HdfsStorageDruidModule;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.EmbeddedResource;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * An {@link EmbeddedResource} that starts an in-process HDFS cluster using
{@link MiniDFSCluster}.
+ * No Docker container is needed; MiniDFSCluster runs entirely within the test
JVM.
+ *
+ * <p>When {@code configureAsDeepStorage} is {@code true} (the default), this
resource also
+ * configures the embedded Druid cluster to use HDFS for segment deep storage
and task logs.
+ * When {@code false}, it only provides HDFS connectivity (useful when HDFS is
the input source
+ * but a different system is the deep storage).
+ */
+public class HdfsStorageResource implements EmbeddedResource
+{
+ private final boolean configureAsDeepStorage;
+ private MiniDFSCluster miniDFSCluster;
+
+ /**
+ * Creates a resource with explicit control over whether HDFS is configured
as deep storage.
+ *
+ * @param configureAsDeepStorage if {@code true}, sets {@code
druid.storage.type=hdfs} and
+ * configures task logs on HDFS; if {@code
false}, only provides
+ * HDFS connectivity for use as an input source
+ */
+ public HdfsStorageResource(boolean configureAsDeepStorage)
+ {
+ this.configureAsDeepStorage = configureAsDeepStorage;
+ }
+
+ @Override
+ public void start()
+ {
+ try {
+ final File tempDir = FileUtils.createTempDir("mini-dfs-");
+ tempDir.deleteOnExit();
+
+ final Configuration conf = new Configuration();
+ // Direct MiniDFSCluster to use our temp directory so we control cleanup.
+ conf.set("hadoop.tmp.dir", tempDir.getAbsolutePath());
+ conf.set("dfs.replication", "1");
+ // Disable permissions checks to simplify test setup
+ conf.set("dfs.permissions.enabled", "false");
+
+ miniDFSCluster = new MiniDFSCluster.Builder(conf)
+ .nameNodePort(0) // use a random available port
+ .build();
+ miniDFSCluster.waitClusterUp();
+ }
+ catch (IOException e) {
+ throw new RuntimeException("Failed to start MiniDFSCluster", e);
+ }
+ }
+
+ @Override
+ public void stop()
+ {
+ if (miniDFSCluster != null) {
+ miniDFSCluster.shutdown(true);
+ miniDFSCluster = null;
+ }
+ }
+
+ @Override
+ public void onStarted(EmbeddedDruidCluster cluster)
+ {
+ cluster.addExtension(HdfsStorageDruidModule.class);
+
+ // Set fs.defaultFS so that the Hadoop client and Druid's
HdfsStorageDruidModule
+ // can connect to this MiniDFSCluster.
+ cluster.addCommonProperty("hadoop.fs.defaultFS", getHdfsUrl());
+
+ if (configureAsDeepStorage) {
+ cluster.addCommonProperty("druid.storage.type", "hdfs");
+ cluster.addCommonProperty("druid.storage.storageDirectory",
"/druid/segments");
+ cluster.addCommonProperty("druid.indexer.logs.type", "hdfs");
+ cluster.addCommonProperty("druid.indexer.logs.directory",
"/druid/indexing-logs");
+ }
+ }
+
+ /**
+ * Returns the HDFS NameNode URL, e.g. {@code hdfs://localhost:12345}.
+ */
+ public String getHdfsUrl()
+ {
+ ensureRunning();
+ return "hdfs://localhost:" + miniDFSCluster.getNameNodePort();
+ }
+
+ /**
+ * Returns the {@link FileSystem} connected to this MiniDFSCluster, for use
in test setup and
+ * teardown (uploading data, deleting folders, etc.).
+ */
+ public FileSystem getFileSystem() throws IOException
+ {
+ ensureRunning();
+ return miniDFSCluster.getFileSystem();
+ }
+
+ private void ensureRunning()
+ {
+ if (miniDFSCluster == null || !miniDFSCluster.isClusterUp()) {
+ throw new IllegalStateException("MiniDFSCluster is not running");
+ }
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsTestUtil.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsTestUtil.java
new file mode 100644
index 00000000000..dbafd479247
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsTestUtil.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.testing.embedded.indexing.Resources;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.util.List;
+
+/**
+ * Test utility for managing files on HDFS during embedded cluster tests.
+ */
+public class HdfsTestUtil
+{
+ private static final Logger LOG = new Logger(HdfsTestUtil.class);
+
+ private final FileSystem fileSystem;
+ private final String basePath;
+
+ /**
+ * @param fileSystem the HDFS {@link FileSystem} (typically from {@link
HdfsStorageResource#getFileSystem()})
+ * @param basePath the HDFS base path under which test files are stored
(e.g. {@code /data/json})
+ */
+ public HdfsTestUtil(FileSystem fileSystem, String basePath)
+ {
+ this.fileSystem = fileSystem;
+ this.basePath = basePath;
+ }
+
+ /**
+ * Uploads resource-relative files to HDFS under the configured base path.
+ *
+ * @param localFiles resource-relative paths to upload (e.g. {@code
"data/json/tiny_wiki_1.json"})
+ */
+ public void uploadDataFilesToHdfs(List<String> localFiles) throws Exception
+ {
+ fileSystem.mkdirs(new Path(basePath));
+ for (String localFile : localFiles) {
+ final String fileName = localFile.substring(localFile.lastIndexOf('/') +
1);
+ final Path hdfsPath = new Path(basePath + "/" + fileName);
+ final Path localPath = new
Path(Resources.getFileForResource(localFile).getAbsolutePath());
+ fileSystem.copyFromLocalFile(localPath, hdfsPath);
+ LOG.info("Uploaded [%s] to HDFS at [%s]", localFile, hdfsPath);
+ }
+ }
+
+ /**
+ * Deletes the given bare file names from the configured base path.
+ *
+ * @param fileNames bare file names (without path prefix) to delete
+ */
+ public void deleteFilesFromHdfs(List<String> fileNames)
+ {
+ for (String fileName : fileNames) {
+ try {
+ fileSystem.delete(new Path(basePath + "/" + fileName), false);
+ }
+ catch (Exception e) {
+ LOG.warn(e, "Unable to delete file [%s] from HDFS", fileName);
+ }
+ }
+ }
+
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToAzureParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToAzureParallelIndexTest.java
new file mode 100644
index 00000000000..9ba4547099b
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToAzureParallelIndexTest.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.azure.AzureStorageResource;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Embedded parallel index test that reads input data from HDFS (in-process
MiniDFSCluster)
+ * and stores segments in Azure Blob Storage (Azurite testcontainer).
+ */
+public class HdfsToAzureParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
+{
+ private static final Logger LOG = new
Logger(HdfsToAzureParallelIndexTest.class);
+
+ /** HDFS is the input source only — deep storage is Azure. */
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(false);
+ private final AzureStorageResource azureResource = new
AzureStorageResource();
+
+ @Override
+ protected HdfsStorageResource getHdfsResource()
+ {
+ return hdfsResource;
+ }
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // HDFS resource: starts the MiniDFSCluster and sets hadoop.fs.defaultFS
so the indexer
+ // can read from HDFS. It does NOT configure Azure-overriding deep-storage
properties.
+ cluster.addResource(hdfsResource);
+
+ // Azure resource: configures Azure as deep storage
(druid.storage.type=azure, etc.).
+ // Adding it after the HDFS resource ensures Azure's deep-storage settings
are not
+ // overridden by anything the HDFS resource might set.
+ cluster.addResource(azureResource);
+ }
+
+ @AfterAll
+ public void deleteSegmentsFromAzure()
+ {
+ try {
+ // The AzureStorageResource creates the container; deleting it cleans up
all segments
+ // written during the test run.
+ azureResource.getStorageClient()
+
.getContainerReference(azureResource.getAzureContainerName())
+ .deleteIfExists();
+ }
+ catch (Exception e) {
+ LOG.warn(e, "Unable to delete Azure container after test");
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testHdfsIndexData(Pair<String, Object> hdfsInputSource) throws
Exception
+ {
+ doHdfsTest(hdfsInputSource, new Pair<>(false, false));
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToGcsParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToGcsParallelIndexTest.java
new file mode 100644
index 00000000000..ed2da3df764
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToGcsParallelIndexTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.gcs.GoogleCloudStorageResource;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Embedded parallel index test that reads input data from HDFS (in-process
MiniDFSCluster)
+ * and stores segments in Google Cloud Storage (FakeGcsServer testcontainer).
+ */
+public class HdfsToGcsParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
+{
+ private static final Logger LOG = new
Logger(HdfsToGcsParallelIndexTest.class);
+
+ /** HDFS is the input source only — deep storage is GCS. */
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(false);
+ private final GoogleCloudStorageResource gcsResource = new
GoogleCloudStorageResource();
+
+ @Override
+ protected HdfsStorageResource getHdfsResource()
+ {
+ return hdfsResource;
+ }
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // HDFS resource: starts the MiniDFSCluster and sets hadoop.fs.defaultFS.
+ // Does NOT configure HDFS as deep storage — GCS fills that role.
+ cluster.addResource(hdfsResource);
+
+ // GCS resource: configures GCS as deep storage
(druid.storage.type=google, etc.)
+ // and creates the GCS bucket. GoogleCloudStorageResource.onStarted() sets
the
+ // storageUrl from the running FakeGcsServer container.
+ cluster.addResource(gcsResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testHdfsIndexData(Pair<String, Object> hdfsInputSource) throws
Exception
+ {
+ doHdfsTest(hdfsInputSource, new Pair<>(false, false));
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToHdfsParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToHdfsParallelIndexTest.java
new file mode 100644
index 00000000000..91ea6041b07
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToHdfsParallelIndexTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Embedded parallel index test that reads from and writes to HDFS using an
in-process
+ * {@link org.apache.hadoop.hdfs.MiniDFSCluster}.
+ */
+public class HdfsToHdfsParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
+{
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(true);
+
+ @Override
+ protected HdfsStorageResource getHdfsResource()
+ {
+ return hdfsResource;
+ }
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ cluster.addResource(hdfsResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testHdfsIndexData(Pair<String, Object> hdfsInputSource) throws
Exception
+ {
+ doHdfsTest(hdfsInputSource, new Pair<>(false, false));
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToS3ParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToS3ParallelIndexTest.java
new file mode 100644
index 00000000000..2c57f70dc46
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/HdfsToS3ParallelIndexTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import org.apache.druid.testing.embedded.minio.MinIOStorageResource;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Embedded parallel index test that reads input data from HDFS (in-process
MiniDFSCluster)
+ * and stores segments in S3 (MinIO testcontainer).
+ *
+ * <p>The MinIO resource is registered after the HDFS resource so that the S3
deep-storage
+ * configuration ({@code druid.storage.type=s3}) is applied last. The HDFS
input-source
+ * connection properties ({@code hadoop.fs.defaultFS}) remain active and are
used by the
+ * indexer to read data from HDFS.
+ */
+public class HdfsToS3ParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
+{
+ private static final Logger LOG = new
Logger(HdfsToS3ParallelIndexTest.class);
+
+ /** HDFS is the input source only — deep storage is S3/MinIO. */
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(false);
+ private final MinIOStorageResource minIOResource = new
MinIOStorageResource();
+
+ @Override
+ protected HdfsStorageResource getHdfsResource()
+ {
+ return hdfsResource;
+ }
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // HDFS resource: starts the MiniDFSCluster and sets hadoop.fs.defaultFS
so the indexer
+ // can read from HDFS. configureAsDeepStorage=false means it does NOT set
druid.storage.type.
+ cluster.addResource(hdfsResource);
+
+ // MinIO resource: configures S3/MinIO as deep storage
(druid.storage.type=s3, etc.).
+ // Adding it after the HDFS resource ensures the S3 deep-storage settings
win.
+ cluster.addResource(minIOResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testHdfsIndexData(Pair<String, Object> hdfsInputSource) throws
Exception
+ {
+ doHdfsTest(hdfsInputSource, new Pair<>(false, false));
+ }
+}
diff --git
a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/S3ToHdfsParallelIndexTest.java
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/S3ToHdfsParallelIndexTest.java
new file mode 100644
index 00000000000..23063af355d
--- /dev/null
+++
b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/hdfs/S3ToHdfsParallelIndexTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.testing.embedded.hdfs;
+
+import org.apache.druid.java.util.common.Pair;
+import org.apache.druid.testing.embedded.EmbeddedDruidCluster;
+import
org.apache.druid.testing.embedded.indexer.AbstractS3InputSourceParallelIndexTest;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.List;
+
+/**
+ * Embedded parallel index test that reads input data from S3 (MinIO
testcontainer) and stores
+ * segments in HDFS (in-process MiniDFSCluster).
+ *
+ * <p>The HDFS resource is registered after the MinIO resource so that the
HDFS deep-storage
+ * configuration ({@code druid.storage.type=hdfs}) overrides the S3
configuration set by MinIO.
+ * The S3 input-source connection properties remain active and are used by the
indexer to read
+ * data from MinIO.
+ */
+public class S3ToHdfsParallelIndexTest extends
AbstractS3InputSourceParallelIndexTest
+{
+ private final HdfsStorageResource hdfsResource = new
HdfsStorageResource(true);
+
+ @Override
+ protected void addResources(EmbeddedDruidCluster cluster)
+ {
+ // S3/MinIO resource first: uploads data to MinIO and sets S3 connection
properties.
+ super.addResources(cluster);
+ // HDFS resource second: overrides druid.storage.type to "hdfs" for deep
storage.
+ cluster.addResource(hdfsResource);
+ }
+
+ @ParameterizedTest
+ @MethodSource("resources")
+ public void testS3IndexData(Pair<String, List<?>> s3InputSource) throws
Exception
+ {
+ doTest(s3InputSource, new Pair<>(false, false), "s3", null);
+ }
+}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
index b9f52e3375d..fbbd966a334 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
@@ -26,29 +26,6 @@ package org.apache.druid.tests;
public class TestNGGroup
{
- /**
- * This group is not part of CI. To run this group, s3 configs/credentials
for your s3 must be provided in a file.
- * The path of the file must then be pass to mvn with
-Doverride.config.path=<PATH_TO_FILE>
- * See integration-tests/docker/environment-configs/override-examples/s3 for
env vars to provide.
- */
- public static final String S3_DEEP_STORAGE = "s3-deep-storage";
-
- /**
- * This group is not part of CI. To run this group, gcs configs/credentials
for your gcs must be provided in a file.
- * The path of the file must then be pass to mvn with
-Doverride.config.path=<PATH_TO_FILE>
- * See integration-tests/docker/environment-configs/override-examples/gcs
for env vars to provide.
- * The path to the folder that contains your GOOGLE_APPLICATION_CREDENTIALS
file must also be pass
- * to mvn with -Dresource.file.dir.path=<PATH_TO_FOLDER>
- */
- public static final String GCS_DEEP_STORAGE = "gcs-deep-storage";
-
- /**
- * This group is not part of CI. To run this group, azure
configs/credentials for your azure must be provided in a file.
- * The path of the file must then be pass to mvn with
-Doverride.config.path=<PATH_TO_FILE>
- * See integration-tests/docker/environment-configs/override-examples/azures
for env vars to provide.
- */
- public static final String AZURE_DEEP_STORAGE = "azure-deep-storage";
-
/**
* This group is not part of CI. To run this group, hadoop configs must be
provided in a file. The path of the file
* must then be pass to mvn with -Doverride.config.path=<PATH_TO_FILE>
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java
deleted file mode 100644
index 9ce161f6ccd..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractAzureInputSourceParallelIndexTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.java.util.common.StringUtils;
-import org.testng.annotations.DataProvider;
-
-import java.io.Closeable;
-import java.util.List;
-import java.util.UUID;
-import java.util.function.Function;
-
-public abstract class AbstractAzureInputSourceParallelIndexTest extends
AbstractITBatchIndexTest
-{
- private static final String INDEX_TASK =
"/indexer/wikipedia_cloud_index_task.json";
- private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
- private static final String INPUT_SOURCE_URIS_KEY = "uris";
- private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
- private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
- private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
- private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
- private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
-
- @DataProvider
- public static Object[][] resources()
- {
- return new Object[][]{
- {new Pair<>(INPUT_SOURCE_URIS_KEY,
- ImmutableList.of(
- "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
- "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
- "azure://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
- )
- )},
- {new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
- ImmutableList.of(
- "azure://%%BUCKET%%/%%PATH%%"
- )
- )},
- {new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
- ImmutableList.of(
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_1),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_2),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_3)
- )
- )}
- };
- }
-
- void doTest(
- Pair<String, List> azureInputSource,
- Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair
- ) throws Exception
- {
- final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
- try (
- final Closeable ignored1 = unloader(indexDatasource +
config.getExtraDatasourceNameSuffix());
- ) {
- final Function<String, String> azurePropsTransform = spec -> {
- try {
- String inputSourceValue =
jsonMapper.writeValueAsString(azureInputSource.rhs);
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%BUCKET%%",
- config.getCloudBucket()
- );
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%PATH%%",
- config.getCloudPath()
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_FORMAT_TYPE%%",
- InputFormatDetails.JSON.getInputFormatType()
- );
- spec = StringUtils.replace(
- spec,
- "%%PARTITIONS_SPEC%%",
- jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_TYPE%%",
- "azure"
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_KEY%%",
- azureInputSource.lhs
- );
- return StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_VALUE%%",
- inputSourceValue
- );
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- };
-
- doIndexTest(
- indexDatasource,
- INDEX_TASK,
- azurePropsTransform,
- INDEX_QUERIES_RESOURCE,
- false,
- true,
- true,
- segmentAvailabilityConfirmationPair
- );
- }
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java
deleted file mode 100644
index f720a5d5943..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractGcsInputSourceParallelIndexTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.java.util.common.StringUtils;
-import org.testng.annotations.DataProvider;
-
-import java.io.Closeable;
-import java.util.List;
-import java.util.UUID;
-import java.util.function.Function;
-
-public abstract class AbstractGcsInputSourceParallelIndexTest extends
AbstractITBatchIndexTest
-{
- private static final String INDEX_TASK =
"/indexer/wikipedia_cloud_index_task.json";
- private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
- private static final String INPUT_SOURCE_URIS_KEY = "uris";
- private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
- private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
- private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
- private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
- private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
-
- @DataProvider
- public static Object[][] resources()
- {
- return new Object[][]{
- {new Pair<>(INPUT_SOURCE_URIS_KEY,
- ImmutableList.of(
- "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
- "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
- "gs://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
- )
- )},
- {new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
- ImmutableList.of(
- "gs://%%BUCKET%%/%%PATH%%"
- )
- )},
- {new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
- ImmutableList.of(
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_1),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_2),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_3)
- )
- )}
- };
- }
-
- void doTest(
- Pair<String, List> gcsInputSource,
- Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair
- ) throws Exception
- {
- final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
- try (
- final Closeable ignored1 = unloader(indexDatasource +
config.getExtraDatasourceNameSuffix());
- ) {
- final Function<String, String> gcsPropsTransform = spec -> {
- try {
- String inputSourceValue =
jsonMapper.writeValueAsString(gcsInputSource.rhs);
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%BUCKET%%",
- config.getCloudBucket()
- );
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%PATH%%",
- config.getCloudPath()
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_FORMAT_TYPE%%",
- InputFormatDetails.JSON.getInputFormatType()
- );
- spec = StringUtils.replace(
- spec,
- "%%PARTITIONS_SPEC%%",
- jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_TYPE%%",
- "google"
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_KEY%%",
- gcsInputSource.lhs
- );
- return StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_VALUE%%",
- inputSourceValue
- );
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- };
-
- doIndexTest(
- indexDatasource,
- INDEX_TASK,
- gcsPropsTransform,
- INDEX_QUERIES_RESOURCE,
- false,
- true,
- true,
- segmentAvailabilityConfirmationPair
- );
- }
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java
deleted file mode 100644
index eaedde10bbf..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractHdfsInputSourceParallelIndexTest.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import com.google.common.collect.ImmutableList;
-import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.java.util.common.StringUtils;
-import org.testng.annotations.DataProvider;
-
-import java.io.Closeable;
-import java.util.List;
-import java.util.UUID;
-import java.util.function.Function;
-
-public abstract class AbstractHdfsInputSourceParallelIndexTest extends
AbstractITBatchIndexTest
-{
- private static final String INDEX_TASK =
"/indexer/wikipedia_cloud_index_task.json";
- private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
- private static final String INPUT_SOURCE_PATHS_KEY = "paths";
-
- @DataProvider
- public static Object[][] resources()
- {
- return new Object[][]{
- {new Pair<>(INPUT_SOURCE_PATHS_KEY,
- "hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
- )},
- {new Pair<>(INPUT_SOURCE_PATHS_KEY,
- ImmutableList.of(
-
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%"
- )
- )},
- {new Pair<>(INPUT_SOURCE_PATHS_KEY,
- ImmutableList.of(
-
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data1%%FILE_EXTENSION%%",
-
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data2%%FILE_EXTENSION%%",
-
"hdfs://druid-it-hadoop:9000/batch_index%%FOLDER_SUFFIX%%/wikipedia_index_data3%%FILE_EXTENSION%%"
- )
- )}
- };
- }
-
- void doTest(
- Pair<String, List> hdfsInputSource,
- InputFormatDetails inputFormatDetails,
- Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair
- ) throws Exception
- {
- final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
- try (
- final Closeable ignored1 = unloader(indexDatasource +
config.getExtraDatasourceNameSuffix());
- ) {
- final Function<String, String> hdfsPropsTransform = spec -> {
- try {
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_TYPE%%",
- "hdfs"
- );
- spec = StringUtils.replace(
- spec,
- "%%PARTITIONS_SPEC%%",
- jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_KEY%%",
- hdfsInputSource.lhs
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_FORMAT_TYPE%%",
- inputFormatDetails.getInputFormatType()
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_VALUE%%",
- jsonMapper.writeValueAsString(hdfsInputSource.rhs)
- );
- spec = StringUtils.replace(
- spec,
- "%%FOLDER_SUFFIX%%",
- inputFormatDetails.getFolderSuffix()
- );
- spec = StringUtils.replace(
- spec,
- "%%FILE_EXTENSION%%",
- inputFormatDetails.getFileExtension()
- );
- return spec;
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- };
-
- doIndexTest(
- indexDatasource,
- INDEX_TASK,
- hdfsPropsTransform,
- INDEX_QUERIES_RESOURCE,
- false,
- true,
- true,
- segmentAvailabilityConfirmationPair
- );
- }
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java
deleted file mode 100644
index 1d378d5dee6..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractS3InputSourceParallelIndexTest.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.java.util.common.StringUtils;
-import org.testng.annotations.DataProvider;
-
-import java.io.Closeable;
-import java.util.List;
-import java.util.UUID;
-import java.util.function.Function;
-
-public abstract class AbstractS3InputSourceParallelIndexTest extends
AbstractITBatchIndexTest
-{
- private static final String INDEX_TASK =
"/indexer/wikipedia_cloud_index_task.json";
- private static final String INDEX_QUERIES_RESOURCE =
"/indexer/wikipedia_index_queries.json";
- private static final String INPUT_SOURCE_URIS_KEY = "uris";
- private static final String INPUT_SOURCE_PREFIXES_KEY = "prefixes";
- private static final String INPUT_SOURCE_OBJECTS_KEY = "objects";
- private static final String WIKIPEDIA_DATA_1 = "wikipedia_index_data1.json";
- private static final String WIKIPEDIA_DATA_2 = "wikipedia_index_data2.json";
- private static final String WIKIPEDIA_DATA_3 = "wikipedia_index_data3.json";
-
- @DataProvider
- public static Object[][] resources()
- {
- return new Object[][]{
- {new Pair<>(INPUT_SOURCE_URIS_KEY,
- ImmutableList.of(
- "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_1,
- "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_2,
- "s3://%%BUCKET%%/%%PATH%%" + WIKIPEDIA_DATA_3
- )
- )},
- {new Pair<>(INPUT_SOURCE_PREFIXES_KEY,
- ImmutableList.of(
- "s3://%%BUCKET%%/%%PATH%%"
- )
- )},
- {new Pair<>(INPUT_SOURCE_OBJECTS_KEY,
- ImmutableList.of(
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_1),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_2),
- ImmutableMap.of("bucket", "%%BUCKET%%", "path",
"%%PATH%%" + WIKIPEDIA_DATA_3)
- )
- )}
- };
- }
-
- void doTest(
- Pair<String, List> s3InputSource,
- Pair<Boolean, Boolean> segmentAvailabilityConfirmationPair
- ) throws Exception
- {
- final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
- try (
- final Closeable ignored1 = unloader(indexDatasource +
config.getExtraDatasourceNameSuffix());
- ) {
- final Function<String, String> s3PropsTransform = spec -> {
- try {
- String inputSourceValue =
jsonMapper.writeValueAsString(s3InputSource.rhs);
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%BUCKET%%",
- config.getCloudBucket()
- );
- inputSourceValue = StringUtils.replace(
- inputSourceValue,
- "%%PATH%%",
- config.getCloudPath()
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_FORMAT_TYPE%%",
- InputFormatDetails.JSON.getInputFormatType()
- );
- spec = StringUtils.replace(
- spec,
- "%%PARTITIONS_SPEC%%",
- jsonMapper.writeValueAsString(new DynamicPartitionsSpec(null,
null))
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_TYPE%%",
- "s3"
- );
- spec = StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_KEY%%",
- s3InputSource.lhs
- );
- return StringUtils.replace(
- spec,
- "%%INPUT_SOURCE_PROPERTY_VALUE%%",
- inputSourceValue
- );
- }
- catch (Exception e) {
- throw new RuntimeException(e);
- }
- };
-
- doIndexTest(
- indexDatasource,
- INDEX_TASK,
- s3PropsTransform,
- INDEX_QUERIES_RESOURCE,
- false,
- true,
- true,
- segmentAvailabilityConfirmationPair
- );
- }
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java
deleted file mode 100644
index ca7ee72e02f..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITAzureToHdfsParallelIndexTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Set the bucket and path for your data. This can be done by setting
-Ddruid.test.config.cloudBucket and
- * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path"
in the config file.
- * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and
wikipedia_index_data3.json
- * located in integration-tests/src/test/resources/data/batch_index/json to
your Azure at the location set in step 1.
- * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure credentials and
hdfs deep storage configs set. See
- * integration-tests/docker/environment-configs/override-examples/azure and
- * integration-tests/docker/environment-configs/override-examples/hdfs for
env vars to provide.
- * 4) Run the test with -Dstart.hadoop.docker=true in the mvn command
- */
-@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITAzureToHdfsParallelIndexTest extends
AbstractAzureInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testAzureIndexData(Pair<String, List> azureInputSource) throws
Exception
- {
- doTest(azureInputSource, new Pair<>(false, false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java
deleted file mode 100644
index 2c9a42e1a6a..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITGcsToHdfsParallelIndexTest.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Set the bucket and path for your data. This can be done by setting
-Ddruid.test.config.cloudBucket and
- * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path"
in the config file.
- * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and
wikipedia_index_data3.json
- * located in integration-tests/src/test/resources/data/batch_index/json to
your GCS at the location set in step 1.
- * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs and hdfs
deep storage configs set. See
- * integration-tests/docker/environment-configs/override-examples/gcs and
- * integration-tests/docker/environment-configs/override-examples/hdfs for
env vars to provide.
- * 4) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that
contains GOOGLE_APPLICATION_CREDENTIALS file
- * 5) Run the test with -Dstart.hadoop.docker=true in the mvn command
- */
-@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITGcsToHdfsParallelIndexTest extends
AbstractGcsInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testGcsIndexData(Pair<String, List> gcsInputSource) throws
Exception
- {
- doTest(gcsInputSource, new Pair<>(false, false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java
deleted file mode 100644
index e0cee7f8a55..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToAzureParallelIndexTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
- * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with Azure
credentials/configs set. See
- * integration-tests/docker/environment-configs/override-examples/azure for
env vars to provide.
- * You will also need to include "druid-hdfs-storage" to
druid_extensions_loadList in this file.
- */
-@Test(groups = TestNGGroup.AZURE_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHdfsToAzureParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws
Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java
deleted file mode 100644
index 5e302a14f77..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToGcsParallelIndexTest.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
- * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with gcs configs set. See
- * integration-tests/docker/environment-configs/override-examples/gcs for
env vars to provide.
- * You will also need to include "druid-hdfs-storage" to
druid_extensions_loadList in this file.
- * 3) Provide -Dresource.file.dir.path=<PATH_TO_FOLDER> with folder that
contains GOOGLE_APPLICATION_CREDENTIALS file
- */
-@Test(groups = TestNGGroup.GCS_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHdfsToGcsParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws
Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java
deleted file mode 100644
index 42073544d14..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToHdfsParallelIndexTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
- * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with hdfs configs set. See
- * integration-tests/docker/environment-configs/override-examples/hdfs for
env vars to provide.
- */
-@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHdfsToHdfsParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testHdfsIndexJsonData(Pair<String, List> hdfsInputSource) throws
Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false));
- }
-
- @Test(dataProvider = "resources")
- public void testHdfsIndexOrcData(Pair<String, List> hdfsInputSource) throws
Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.ORC, new Pair<>(false, false));
- }
-
- @Test(dataProvider = "resources")
- public void testHdfsIndexParquetData(Pair<String, List> hdfsInputSource)
throws Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.PARQUET, new Pair<>(false,
false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java
deleted file mode 100644
index 5e674fda106..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITHdfsToS3ParallelIndexTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Run the test with -Dstart.hadoop.docker=true in the mvn command
- * 2) Provide -Doverride.config.path=<PATH_TO_FILE> with s3
credentials/configs set. See
- * integration-tests/docker/environment-configs/override-examples/s3 for
env vars to provide.
- * You will also need to include "druid-hdfs-storage" to
druid_extensions_loadList in this file.
- */
-@Test(groups = TestNGGroup.S3_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITHdfsToS3ParallelIndexTest extends
AbstractHdfsInputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testHdfsIndexData(Pair<String, List> hdfsInputSource) throws
Exception
- {
- doTest(hdfsInputSource, InputFormatDetails.JSON, new Pair<>(false, false));
- }
-}
diff --git
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java
b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java
deleted file mode 100644
index a5869447156..00000000000
---
a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITS3ToHdfsParallelIndexTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.tests.indexer;
-
-import org.apache.druid.java.util.common.Pair;
-import org.apache.druid.testing.guice.DruidTestModuleFactory;
-import org.apache.druid.tests.TestNGGroup;
-import org.testng.annotations.Guice;
-import org.testng.annotations.Test;
-
-import java.util.List;
-
-/**
- * IMPORTANT:
- * To run this test, you must:
- * 1) Set the bucket and path for your data. This can be done by setting
-Ddruid.test.config.cloudBucket and
- * -Ddruid.test.config.cloudPath or setting "cloud_bucket" and "cloud_path"
in the config file.
- * 2) Copy wikipedia_index_data1.json, wikipedia_index_data2.json, and
wikipedia_index_data3.json
- * located in integration-tests/src/test/resources/data/batch_index/json to
your S3 at the location set in step 1.
- * 3) Provide -Doverride.config.path=<PATH_TO_FILE> with s3 credentials and
hdfs deep storage configs set. See
- * integration-tests/docker/environment-configs/override-examples/s3 and
- * integration-tests/docker/environment-configs/override-examples/hdfs for
env vars to provide.
- * 4) Run the test with -Dstart.hadoop.docker=true in the mvn command
- */
-@Test(groups = TestNGGroup.HDFS_DEEP_STORAGE)
-@Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITS3ToHdfsParallelIndexTest extends
AbstractS3InputSourceParallelIndexTest
-{
- @Test(dataProvider = "resources")
- public void testS3IndexData(Pair<String, List> s3InputSource) throws
Exception
- {
- doTest(s3InputSource, new Pair<>(false, false));
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]