This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-c108335-hive-sql in repository https://gitbox.apache.org/repos/asf/doris.git
commit 15498d56c91fb19e81b5766a91d7ae89b8908838 Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Thu Apr 3 14:55:28 2025 +0800 [fix](oss) the write to hive table on oss-hdfs may fail (#49754) ### What problem does this PR solve? Problem Summary: When insert data to a hive table on oss-hdfs, it may fail with following error: ``` Failed to delete directories for files: [oss://xxx] ``` This is because for oss-hdfs, we should use hadoop filesystem to do the operation. This PR fix it. When calling `getFSIdentity()`, we should pass the properties so that `LocationPath` can identify the right fs type. --- .../org/apache/doris/common/util/LocationPath.java | 5 ++-- .../doris/datasource/hive/HiveMetaStoreCache.java | 10 ++++--- .../doris/fs/remote/SwitchingFileSystem.java | 2 +- .../apache/doris/common/util/LocationPathTest.java | 33 +++++++++++++++------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java index cd57fc645cc..1a9491af68d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java @@ -208,8 +208,9 @@ public class LocationPath { // Return the file system type and the file system identity. // The file system identity is the scheme and authority of the URI, eg. "hdfs://host:port" or "s3://bucket". - public static Pair<FileSystemType, String> getFSIdentity(String location, String bindBrokerName) { - LocationPath locationPath = new LocationPath(location, Collections.emptyMap(), true); + public static Pair<FileSystemType, String> getFSIdentity(String location, + Map<String, String> properties, String bindBrokerName) { + LocationPath locationPath = new LocationPath(location, properties, true); FileSystemType fsType = (bindBrokerName != null) ? FileSystemType.BROKER : locationPath.getFileSystemType(); URI uri = locationPath.getPath().toUri(); String fsIdent = Strings.nullToEmpty(uri.getScheme()) + "://" + Strings.nullToEmpty(uri.getAuthority()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 9bf63df6970..7d5feeb3685 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -346,10 +346,11 @@ public class HiveMetaStoreCache { DirectoryLister directoryLister, TableIf table) throws UserException { FileCacheValue result = new FileCacheValue(); + Map<String, String> properties = catalog.getCatalogProperty().getProperties(); RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey(LocationPath.getFSIdentity( - location, bindBrokerName), - catalog.getCatalogProperty().getProperties(), + location, properties, bindBrokerName), + properties, bindBrokerName, jobConf)); result.setSplittable(HiveUtil.isSplittable(fs, inputFormat, location)); // For Tez engine, it may generate subdirectoies for "union" query. @@ -754,12 +755,13 @@ public class HiveMetaStoreCache { return fileCacheValues; } + Map<String, String> properties = catalog.getCatalogProperty().getProperties(); for (HivePartition partition : partitions) { //Get filesystem multiple times, Reason: https://github.com/apache/doris/pull/23409. RemoteFileSystem fileSystem = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(partition.getPath(), bindBrokerName), - catalog.getCatalogProperty().getProperties(), bindBrokerName, jobConf)); + LocationPath.getFSIdentity(partition.getPath(), properties, bindBrokerName), + properties, bindBrokerName, jobConf)); AuthenticationConfig authenticationConfig = AuthenticationConfig.getKerberosConfig(jobConf); HadoopAuthenticator hadoopAuthenticator = diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java index 00802922ef3..ab7c91d693a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java @@ -125,7 +125,7 @@ public class SwitchingFileSystem implements FileSystem { public FileSystem fileSystem(String location) { return extMetaCacheMgr.getFsCache().getRemoteFileSystem( new FileSystemCache.FileSystemCacheKey( - LocationPath.getFSIdentity(location, + LocationPath.getFSIdentity(location, properties, bindBrokerName), properties, bindBrokerName)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java index 93a46a2dfa0..9c8866c4a86 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java @@ -25,6 +25,7 @@ import org.apache.doris.fs.FileSystemType; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -39,7 +40,8 @@ public class LocationPathTest { String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("hdfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.DFS); // HA props Map<String, String> props = new HashMap<>(); @@ -92,7 +94,8 @@ public class LocationPathTest { // BE loc = locationPath.toStorageLocation().toString(); Assertions.assertTrue(loc.startsWith("jfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(loc, null).first, FileSystemType.JFS); + Assertions.assertEquals(LocationPath.getFSIdentity(loc, Collections.emptyMap(), null).first, + FileSystemType.JFS); } @Test @@ -106,7 +109,8 @@ public class LocationPathTest { // BE String beLoc = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLoc.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, Collections.emptyMap(), null).first, + FileSystemType.S3); } @Test @@ -118,17 +122,21 @@ public class LocationPathTest { // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); + // test oss-hdfs rangeProps.put(OssProperties.ENDPOINT, "oss-dls.aliyuncs.com"); locationPath = new LocationPath("oss://test.oss-dls.aliyuncs.com/path", rangeProps); + Assertions.assertEquals("oss://test.oss-dls.aliyuncs.com/path", locationPath.get()); + Assertions.assertEquals(LocationPath.getFSIdentity(locationPath.get(), rangeProps, null).first, + FileSystemType.DFS); // FE Assertions.assertTrue(locationPath.get().startsWith("oss://test.oss-dls.aliyuncs")); // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("oss://test.oss-dls.aliyuncs")); Assertions.assertEquals(locationPath.getFileSystemType(), FileSystemType.DFS); - } @Test @@ -140,7 +148,8 @@ public class LocationPathTest { String beLocation = locationPath.toStorageLocation().toString(); // BE Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); locationPath = new LocationPath("cosn://test.com", rangeProps); // FE @@ -148,7 +157,8 @@ public class LocationPathTest { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); locationPath = new LocationPath("ofs://test.com", rangeProps); // FE @@ -156,7 +166,8 @@ public class LocationPathTest { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("ofs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.OFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.OFS); // GFS is now equals to DFS locationPath = new LocationPath("gfs://test.com", rangeProps); @@ -165,7 +176,8 @@ public class LocationPathTest { // BE beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("gfs://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.DFS); } @Test @@ -177,7 +189,8 @@ public class LocationPathTest { // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("s3://")); - Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3); + Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, Collections.emptyMap(), null).first, + FileSystemType.S3); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org