This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch branch-0.x
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/branch-0.x by this push:
new 42b23a61da19 [HUDI-9168] Refactor hudi-client-common to not import
hudi-aws (#13701)
42b23a61da19 is described below
commit 42b23a61da19b03df53f47df70d70abadef6df8d
Author: Alex R <[email protected]>
AuthorDate: Mon Aug 11 17:59:46 2025 -0700
[HUDI-9168] Refactor hudi-client-common to not import hudi-aws (#13701)
---
hudi-aws/pom.xml | 5 ++++
.../cloudwatch/CloudWatchMetricsReporter.java | 3 +-
.../cloudwatch/CloudWatchReporter.java | 5 +++-
.../hudi/aws/sync/AWSGlueCatalogSyncClient.java | 2 +-
.../java/org/apache/hudi/aws/utils/S3Utils.java | 27 ------------------
.../cloudwatch/TestCloudWatchMetricsReporter.java | 25 +++++++++++++++--
.../cloudwatch/TestCloudWatchReporter.java | 10 +++----
hudi-client/hudi-client-common/pom.xml | 17 ++++--------
.../java/org/apache/hudi/common/fs/FSUtils.java | 5 ++++
.../hudi/metrics/MetricsReporterFactory.java | 2 +-
.../hudi/metrics/TestMetricsReporterFactory.java | 1 -
.../org/apache/hudi/common/fs/TestFSUtils.java | 32 ++++++++++++++++++++++
12 files changed, 82 insertions(+), 52 deletions(-)
diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml
index 4e0c89efeb23..7dc22525a8af 100644
--- a/hudi-aws/pom.xml
+++ b/hudi-aws/pom.xml
@@ -42,6 +42,11 @@
</dependency>
<!-- Hoodie -->
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-client-common</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/cloudwatch/CloudWatchMetricsReporter.java
b/hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchMetricsReporter.java
similarity index 96%
rename from
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/cloudwatch/CloudWatchMetricsReporter.java
rename to
hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchMetricsReporter.java
index 68e4951f74fd..03f71410b36d 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/cloudwatch/CloudWatchMetricsReporter.java
+++
b/hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchMetricsReporter.java
@@ -16,9 +16,8 @@
* limitations under the License.
*/
-package org.apache.hudi.metrics.cloudwatch;
+package org.apache.hudi.aws.metrics.cloudwatch;
-import org.apache.hudi.aws.cloudwatch.CloudWatchReporter;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.metrics.MetricsReporter;
diff --git
a/hudi-aws/src/main/java/org/apache/hudi/aws/cloudwatch/CloudWatchReporter.java
b/hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchReporter.java
similarity index 98%
rename from
hudi-aws/src/main/java/org/apache/hudi/aws/cloudwatch/CloudWatchReporter.java
rename to
hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchReporter.java
index f8fefbc69348..c440159b4fe5 100644
---
a/hudi-aws/src/main/java/org/apache/hudi/aws/cloudwatch/CloudWatchReporter.java
+++
b/hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchReporter.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.hudi.aws.cloudwatch;
+package org.apache.hudi.aws.metrics.cloudwatch;
import org.apache.hudi.aws.credentials.HoodieAWSCredentialsProviderFactory;
import org.apache.hudi.common.util.Option;
@@ -235,6 +235,9 @@ public class CloudWatchReporter extends ScheduledReporter {
} catch (final Exception ex) {
LOG.error("Error reporting metrics to CloudWatch. The data in this
CloudWatch request "
+ "may have been discarded, and not made it to CloudWatch.", ex);
+ if (ex instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
}
}
}
diff --git
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
index d379109a6243..681607117c65 100644
---
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
+++
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
@@ -91,7 +91,7 @@ import java.util.concurrent.Future;
import java.util.function.Consumer;
import java.util.stream.Collectors;
-import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
+import static org.apache.hudi.common.fs.FSUtils.s3aToS3;
import static org.apache.hudi.common.util.MapUtils.containsAll;
import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty;
import static
org.apache.hudi.config.GlueCatalogSyncClientConfig.ALL_PARTITIONS_READ_PARALLELISM;
diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java
b/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java
deleted file mode 100644
index bfb208ee1505..000000000000
--- a/hudi-aws/src/main/java/org/apache/hudi/aws/utils/S3Utils.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.aws.utils;
-
-public final class S3Utils {
-
- public static String s3aToS3(String s3aUrl) {
- return s3aUrl.replaceFirst("(?i)^s3a://", "s3://");
- }
-}
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
b/hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
similarity index 77%
rename from
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
rename to
hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
index 4b1aaffbf86d..d0785bcb32fe 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
+++
b/hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchMetricsReporter.java
@@ -16,9 +16,9 @@
* limitations under the License.
*/
-package org.apache.hudi.metrics.cloudwatch;
+package org.apache.hudi.aws.metrics.cloudwatch;
-import org.apache.hudi.aws.cloudwatch.CloudWatchReporter;
+import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
import org.apache.hudi.metrics.MetricsReporterFactory;
import org.apache.hudi.metrics.MetricsReporterType;
@@ -41,6 +41,9 @@ import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
public class TestCloudWatchMetricsReporter {
+ @Mock
+ private HoodieWriteConfig writeConfig;
+
@Mock
private HoodieMetricsConfig metricsConfig;
@@ -65,6 +68,22 @@ public class TestCloudWatchMetricsReporter {
verify(reporter, times(1)).stop();
}
+ @Test
+ public void testReporterUsingMetricsConfig() {
+ when(writeConfig.getMetricsConfig()).thenReturn(metricsConfig);
+ when(metricsConfig.getCloudWatchReportPeriodSeconds()).thenReturn(30);
+ CloudWatchMetricsReporter metricsReporter = new
CloudWatchMetricsReporter(writeConfig, registry, reporter);
+
+ metricsReporter.start();
+ verify(reporter, times(1)).start(30, TimeUnit.SECONDS);
+
+ metricsReporter.report();
+ verify(reporter, times(1)).report();
+
+ metricsReporter.stop();
+ verify(reporter, times(1)).stop();
+ }
+
@Test
public void testReporterViaReporterFactory() {
try {
@@ -75,7 +94,7 @@ public class TestCloudWatchMetricsReporter {
} catch (Exception e) {
assertTrue(e.getCause() instanceof InvocationTargetException);
assertTrue(Arrays.stream(((InvocationTargetException)
e.getCause()).getTargetException().getStackTrace()).anyMatch(
- ste ->
ste.toString().contains("org.apache.hudi.aws.cloudwatch.CloudWatchReporter.getAmazonCloudWatchClient")));
+ ste ->
ste.toString().contains("org.apache.hudi.aws.metrics.cloudwatch.CloudWatchReporter.getAmazonCloudWatchClient")));
}
}
}
diff --git
a/hudi-aws/src/test/java/org/apache/hudi/aws/cloudwatch/TestCloudWatchReporter.java
b/hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchReporter.java
similarity index 94%
rename from
hudi-aws/src/test/java/org/apache/hudi/aws/cloudwatch/TestCloudWatchReporter.java
rename to
hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchReporter.java
index 600dd5786959..dc3a4f0493c4 100644
---
a/hudi-aws/src/test/java/org/apache/hudi/aws/cloudwatch/TestCloudWatchReporter.java
+++
b/hudi-aws/src/test/java/org/apache/hudi/aws/metrics/cloudwatch/TestCloudWatchReporter.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.hudi.aws.cloudwatch;
+package org.apache.hudi.aws.metrics.cloudwatch;
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
import software.amazon.awssdk.services.cloudwatch.model.Dimension;
@@ -49,10 +49,10 @@ import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
-import static
org.apache.hudi.aws.cloudwatch.CloudWatchReporter.DIMENSION_COUNT_TYPE_VALUE;
-import static
org.apache.hudi.aws.cloudwatch.CloudWatchReporter.DIMENSION_GAUGE_TYPE_VALUE;
-import static
org.apache.hudi.aws.cloudwatch.CloudWatchReporter.DIMENSION_METRIC_TYPE_KEY;
-import static
org.apache.hudi.aws.cloudwatch.CloudWatchReporter.DIMENSION_TABLE_NAME_KEY;
+import static
org.apache.hudi.aws.metrics.cloudwatch.CloudWatchReporter.DIMENSION_COUNT_TYPE_VALUE;
+import static
org.apache.hudi.aws.metrics.cloudwatch.CloudWatchReporter.DIMENSION_GAUGE_TYPE_VALUE;
+import static
org.apache.hudi.aws.metrics.cloudwatch.CloudWatchReporter.DIMENSION_METRIC_TYPE_KEY;
+import static
org.apache.hudi.aws.metrics.cloudwatch.CloudWatchReporter.DIMENSION_TABLE_NAME_KEY;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ExtendWith(MockitoExtension.class)
diff --git a/hudi-client/hudi-client-common/pom.xml
b/hudi-client/hudi-client-common/pom.xml
index 274f1310116b..4a903c8ab9ae 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -48,17 +48,6 @@
<artifactId>hudi-io</artifactId>
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.hudi</groupId>
- <artifactId>hudi-hadoop-common</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hudi</groupId>
- <artifactId>hudi-aws</artifactId>
- <version>${project.version}</version>
- <scope>provided</scope>
- </dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-timeline-service</artifactId>
@@ -136,6 +125,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-hadoop-mr</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-tests-common</artifactId>
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 05877a34604e..f415488775b5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -747,6 +747,11 @@ public class FSUtils {
return pathInfoList;
}
+ // Converts s3a to s3a
+ public static String s3aToS3(String s3aUrl) {
+ return s3aUrl.replaceFirst("(?i)^s3a://", "s3://");
+ }
+
/**
* Serializable function interface.
*
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
b/hudi-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
index 455cf8de1c54..0b86007fb616 100644
---
a/hudi-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
+++
b/hudi-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
@@ -86,7 +86,7 @@ public class MetricsReporterFactory {
reporter = new ConsoleMetricsReporter(registry);
break;
case CLOUDWATCH:
- reporter = (MetricsReporter)
ReflectionUtils.loadClass("org.apache.hudi.metrics.cloudwatch.CloudWatchMetricsReporter",
+ reporter = (MetricsReporter)
ReflectionUtils.loadClass("org.apache.hudi.aws.metrics.cloudwatch.CloudWatchMetricsReporter",
new Class[]{HoodieMetricsConfig.class, MetricRegistry.class},
metricsConfig, registry);
break;
case M3:
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
b/hudi-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
similarity index 99%
rename from
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
rename to
hudi-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
index dd0ada876932..6e633bd41820 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
+++
b/hudi-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
@@ -96,4 +96,3 @@ public class TestMetricsReporterFactory {
}
}
-
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
index 076cef090747..76a24ad8c03a 100644
---
a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
+++
b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
@@ -48,6 +48,7 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
import java.io.IOException;
import java.nio.file.Files;
@@ -581,6 +582,37 @@ public class TestFSUtils extends HoodieCommonTestHarness {
false));
}
+ @Test
+ void testS3aToS3_AWS() {
+ // Test cases for AWS S3 URLs
+ assertEquals("s3://my-bucket/path/to/object",
FSUtils.s3aToS3("s3a://my-bucket/path/to/object"));
+ assertEquals("s3://my-bucket", FSUtils.s3aToS3("s3a://my-bucket"));
+ assertEquals("s3://MY-BUCKET/PATH/TO/OBJECT",
FSUtils.s3aToS3("s3a://MY-BUCKET/PATH/TO/OBJECT"));
+ assertEquals("s3://my-bucket/path/to/object",
FSUtils.s3aToS3("S3a://my-bucket/path/to/object"));
+ assertEquals("s3://my-bucket/path/to/object",
FSUtils.s3aToS3("s3A://my-bucket/path/to/object"));
+ assertEquals("s3://my-bucket/path/to/object",
FSUtils.s3aToS3("S3A://my-bucket/path/to/object"));
+ assertEquals("s3://my-bucket/s3a://another-bucket/another/path",
FSUtils.s3aToS3("s3a://my-bucket/s3a://another-bucket/another/path"));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "gs://my-bucket/path/to/object",
+ "gs://my-bucket",
+ "gs://MY-BUCKET/PATH/TO/OBJECT",
+ "https://myaccount.blob.core.windows.net/mycontainer/path/to/blob",
+ "https://myaccount.blob.core.windows.net/MYCONTAINER/PATH/TO/BLOB",
+ "https://example.com/path/to/resource",
+ "http://example.com",
+ "ftp://example.com/resource",
+ "",
+ "gs://my-bucket/path/to/s3a://object",
+ "gs://my-bucket s3a://my-object",
+ })
+
+ void testUriDoesNotChange(String uri) {
+ assertEquals(uri, FSUtils.s3aToS3(uri));
+ }
+
private StoragePath getHoodieTempDir() {
return new StoragePath(baseUri.toString(), ".hoodie/.temp");
}