This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 24668b842ac [fix](hdfs)read 'fs.defaultFS' from core-site.xml for hdfs 
load which has no default fs (#34217)
24668b842ac is described below

commit 24668b842ac93cf8b57d511057299846af1606a7
Author: slothever <18522955+w...@users.noreply.github.com>
AuthorDate: Wed May 1 00:24:22 2024 +0800

    [fix](hdfs)read 'fs.defaultFS' from core-site.xml for hdfs load which has 
no default fs (#34217)
    
    read 'fs.defaultFS' from hdfs-site.xml for hdfs load which has no default fs
---
 .../org/apache/doris/datasource/FileGroupInfo.java |  6 +++
 .../load_p0/stream_load/test_hdfs_json_load.out    |  5 +++
 .../load_p0/stream_load/test_hdfs_json_load.groovy | 51 ++++++++++++++++++++++
 3 files changed, 62 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileGroupInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileGroupInfo.java
index 2a2f9f8e563..4cea2e7e883 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileGroupInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileGroupInfo.java
@@ -46,9 +46,11 @@ import org.apache.doris.thrift.TUniqueId;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import org.apache.hadoop.fs.Path;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -316,6 +318,10 @@ public class FileGroupInfo {
             rangeDesc.setSize(rangeBytes);
             rangeDesc.setFileSize(fileStatus.size);
             rangeDesc.setColumnsFromPath(columnsFromPath);
+            if (getFileType() == TFileType.FILE_HDFS) {
+                URI fileUri = new Path(fileStatus.path).toUri();
+                rangeDesc.setFsName(fileUri.getScheme() + "://" + 
fileUri.getAuthority());
+            }
         } else {
             // for stream load
             if (getFileType() == TFileType.FILE_LOCAL) {
diff --git a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out 
b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
index 7649ddbf0b8..a050b95f9ae 100644
--- a/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_hdfs_json_load.out
@@ -311,3 +311,8 @@
 50     guangzhou       2345675
 200    changsha        3456789
 
+-- !select15 --
+30     hangzhou        2345673
+40     shenzhen        2345674
+50     guangzhou       2345675
+200    changsha        3456789
diff --git 
a/regression-test/suites/load_p0/stream_load/test_hdfs_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_hdfs_json_load.groovy
index 3fe315c48a9..af7d347b71e 100644
--- a/regression-test/suites/load_p0/stream_load/test_hdfs_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_hdfs_json_load.groovy
@@ -77,6 +77,39 @@ suite("test_hdfs_json_load", 
"p0,external,external_docker,external_docker_hive,h
         assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
     }
 
+    def load_from_hdfs2 = {new_json_reader_flag, strip_flag, fuzzy_flag, 
testTablex, label, fileName,
+                           fsPath, hdfsUser, exprs, jsonpaths, json_root, 
columns_parameter, where ->
+        def hdfsFilePath = 
"${fsPath}/user/doris/preinstalled_data/json_format_test/${fileName}"
+        def result1= sql """
+                        LOAD LABEL ${label} (
+                            DATA INFILE("${hdfsFilePath}")
+                            INTO TABLE ${testTablex} 
+                            FORMAT as "json"
+                            ${columns_parameter}
+                            ${exprs}
+                            ${where}
+                            properties(
+                                "json_root" = "${json_root}",
+                                "jsonpaths" = "${jsonpaths}",
+                                "strip_outer_array" = "${strip_flag}",
+                                "fuzzy_parse" = "${fuzzy_flag}"
+                                )
+                            )
+                        with HDFS (
+                            "hadoop.username" = "${hdfsUser}"
+                            )
+                        PROPERTIES (
+                            "timeout"="1200"
+                            );
+                        """
+
+        println "${result1}"
+
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+    }
+
     def check_load_result = {checklabel, testTablex ->
         def max_try_milli_secs = 30000
         while(max_try_milli_secs) {
@@ -514,7 +547,23 @@ suite("test_hdfs_json_load", 
"p0,external,external_docker,external_docker_hive,h
         }
     }
 
+    // case15: verify no default FS properties
+    def q15 = {
+        try {
+            def test_load_label1 = 
UUID.randomUUID().toString().replaceAll("-", "")
+            sql "DROP TABLE IF EXISTS ${testTable}"
+            create_test_table1.call(testTable)
+            load_from_hdfs2.call("false", "false", "false", testTable, 
test_load_label1, "nest_json.json", fsPath, hdfsUser,
+                    "SET(id = id * 10)", """[\\"\$.id\\", 
\\"\$.code\\",\\"\$.city\\"]""", '$.item',
+                    '(id, code, city)', 'WHERE id>20')
 
+            check_load_result(test_load_label1, testTable)
+            sql "sync"
+            qt_select15 "select * from ${testTable} order by id"
+        } finally {
+            try_sql("DROP TABLE IF EXISTS ${testTable}")
+        }
+    }
 
     
     String enabled = context.config.otherConfigs.get("enableHiveTest")
@@ -547,5 +596,7 @@ suite("test_hdfs_json_load", 
"p0,external,external_docker,external_docker_hive,h
         q13()
         log.info("Begin Test q14:")
         q14()
+        log.info("Begin Test q15:")
+        q15()
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to