This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 896fd7d6bc7 branch-2.1: [Opt](test) export/outfile case supports multi-kerberos env #47189 (#47239) 896fd7d6bc7 is described below commit 896fd7d6bc73c2b7a7601c36bf50d65dc8388639 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Thu Jan 23 15:19:44 2025 +0800 branch-2.1: [Opt](test) export/outfile case supports multi-kerberos env #47189 (#47239) Cherry-picked from #47189 Co-authored-by: zgxme <zhenggaoxi...@selectdb.com> --- regression-test/conf/regression-conf.groovy | 1 + .../export_p2/test_export_max_file_size.groovy | 269 +++++++++++---------- .../test_outfile_orc_max_file_size.groovy | 218 +++++++++-------- .../suites/export_p2/test_outfile_p2.groovy | 95 ++++---- 4 files changed, 301 insertions(+), 282 deletions(-) diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index 87f56ae5e75..f275dc63deb 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -177,6 +177,7 @@ dfsNamenodeRpcAddress3="" hadoopSecurityAuthentication = "" hadoopKerberosKeytabPath = "" hadoopKerberosPrincipal = "" +hadoopSecurityAutoToLocal = "" //paimon catalog test config for bigdata diff --git a/regression-test/suites/export_p2/test_export_max_file_size.groovy b/regression-test/suites/export_p2/test_export_max_file_size.groovy index a9e41ab94ea..ebe89ed3ef5 100644 --- a/regression-test/suites/export_p2/test_export_max_file_size.groovy +++ b/regression-test/suites/export_p2/test_export_max_file_size.groovy @@ -15,142 +15,75 @@ // specific language governing permissions and limitations // under the License. -suite("test_export_max_file_size", "p2") { - // open nereids - sql """ set enable_nereids_planner=true """ - sql """ set enable_fallback_to_original_planner=false """ +suite("test_export_max_file_size", "p2,external") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ - String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") - String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") - String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") - String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") - String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") - String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") - String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") - String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") - String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") + String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") + String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") + String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") + String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") + String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") + String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") + String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") + String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") + String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") + String hadoopSecurityAutoToLocal =context.config.otherConfigs.get("hadoopSecurityAutoToLocal") - - def table_export_name = "test_export_max_file_size" - // create table and insert - sql """ DROP TABLE IF EXISTS ${table_export_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_export_name} ( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` INT COMMENT "用户年龄", - `sex` INT COMMENT "用户性别", - `bool_col` boolean COMMENT "", - `int_col` int COMMENT "", - `bigint_col` bigint COMMENT "", - `largeint_col` largeint COMMENT "", - `float_col` float COMMENT "", - `double_col` double COMMENT "", - `char_col` CHAR(10) COMMENT "", - `decimal_col` decimal COMMENT "" - ) - DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); - """ - - // Used to store the data exported before - def table_load_name = "test_load" - sql """ DROP TABLE IF EXISTS ${table_load_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_load_name} ( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` INT COMMENT "用户年龄", - `sex` INT COMMENT "用户性别", - `bool_col` boolean COMMENT "", - `int_col` int COMMENT "", - `bigint_col` bigint COMMENT "", - `largeint_col` largeint COMMENT "", - `float_col` float COMMENT "", - `double_col` double COMMENT "", - `char_col` CHAR(10) COMMENT "", - `decimal_col` decimal COMMENT "" - ) - DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); - """ - - def load_data_path = "/user/export_test/exp_max_file_size.csv" - sql """ - insert into ${table_export_name} - select * from hdfs( - "uri" = "hdfs://${dfsNameservices}${load_data_path}", - "format" = "csv", - "dfs.data.transfer.protection" = "integrity", - 'dfs.nameservices'="${dfsNameservices}", - 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", - 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", - 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", - 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", - 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); + def table_export_name = "test_export_max_file_size" + // create table and insert + sql """ DROP TABLE IF EXISTS ${table_export_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_export_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` INT COMMENT "用户年龄", + `sex` INT COMMENT "用户性别", + `bool_col` boolean COMMENT "", + `int_col` int COMMENT "", + `bigint_col` bigint COMMENT "", + `largeint_col` largeint COMMENT "", + `float_col` float COMMENT "", + `double_col` double COMMENT "", + `char_col` CHAR(10) COMMENT "", + `decimal_col` decimal COMMENT "" + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); """ - - def waiting_export = { export_label -> - while (true) { - def res = sql """ show export where label = "${export_label}" """ - logger.info("export state: " + res[0][2]) - if (res[0][2] == "FINISHED") { - return res[0][11] - } else if (res[0][2] == "CANCELLED") { - throw new IllegalStateException("""export failed: ${res[0][10]}""") - } else { - sleep(5000) - } - } - } - - def outFilePath = """/user/export_test/test_max_file_size/exp_""" - - // 1. csv test - def test_export = {format, file_suffix, isDelete -> - def uuid = UUID.randomUUID().toString() - // exec export + // Used to store the data exported before + def table_load_name = "test_load" + sql """ DROP TABLE IF EXISTS ${table_load_name} """ sql """ - EXPORT TABLE ${table_export_name} TO "hdfs://${dfsNameservices}${outFilePath}" - PROPERTIES( - "label" = "${uuid}", - "format" = "${format}", - "max_file_size" = "5MB", - "delete_existing_files"="${isDelete}" + CREATE TABLE IF NOT EXISTS ${table_load_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` INT COMMENT "用户年龄", + `sex` INT COMMENT "用户性别", + `bool_col` boolean COMMENT "", + `int_col` int COMMENT "", + `bigint_col` bigint COMMENT "", + `largeint_col` largeint COMMENT "", + `float_col` float COMMENT "", + `double_col` double COMMENT "", + `char_col` CHAR(10) COMMENT "", + `decimal_col` decimal COMMENT "" ) - with HDFS ( - "dfs.data.transfer.protection" = "integrity", - 'dfs.nameservices'="${dfsNameservices}", - 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", - 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", - 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", - 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", - 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); """ - def outfile_info = waiting_export.call(uuid) - def json = parseJson(outfile_info) - assert json instanceof List - assertEquals("3", json.fileNumber[0][0]) - def outfile_url = json.url[0][0] - - for (int j = 0; j < json.fileNumber[0][0].toInteger(); ++j ) { - // check data correctness - sql """ - insert into ${table_load_name} + def load_data_path = "/user/export_test/exp_max_file_size.csv" + sql """ + insert into ${table_export_name} select * from hdfs( - "uri" = "${outfile_url}${j}.csv", + "uri" = "hdfs://${dfsNameservices}${load_data_path}", "format" = "csv", "dfs.data.transfer.protection" = "integrity", 'dfs.nameservices'="${dfsNameservices}", @@ -161,15 +94,87 @@ suite("test_export_max_file_size", "p2") { 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" ); """ + + + def waiting_export = { export_label -> + while (true) { + def res = sql """ show export where label = "${export_label}" """ + logger.info("export state: " + res[0][2]) + if (res[0][2] == "FINISHED") { + return res[0][11] + } else if (res[0][2] == "CANCELLED") { + throw new IllegalStateException("""export failed: ${res[0][10]}""") + } else { + sleep(5000) + } + } } - } - // begin test - test_export('csv', 'csv', true); - order_qt_select """ select * from ${table_load_name} order by user_id limit 1000 """ - order_qt_select_cnt """ select count(*) from ${table_load_name} """ + def outFilePath = """/user/export_test/test_max_file_size/exp_""" + + // 1. csv test + def test_export = {format, file_suffix, isDelete -> + def uuid = UUID.randomUUID().toString() + // exec export + sql """ + EXPORT TABLE ${table_export_name} TO "hdfs://${dfsNameservices}${outFilePath}" + PROPERTIES( + "label" = "${uuid}", + "format" = "${format}", + "max_file_size" = "5MB", + "delete_existing_files"="${isDelete}" + ) + with HDFS ( + "dfs.data.transfer.protection" = "integrity", + 'dfs.nameservices'="${dfsNameservices}", + 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", + 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", + 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", + 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", + 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + ); + """ + + def outfile_info = waiting_export.call(uuid) + def json = parseJson(outfile_info) + assert json instanceof List + assertEquals("3", json.fileNumber[0][0]) + def outfile_url = json.url[0][0] + for (int j = 0; j < json.fileNumber[0][0].toInteger(); ++j ) { + // check data correctness + sql """ + insert into ${table_load_name} + select * from hdfs( + "uri" = "${outfile_url}${j}.csv", + "format" = "csv", + "dfs.data.transfer.protection" = "integrity", + 'dfs.nameservices'="${dfsNameservices}", + 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", + 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", + 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", + 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", + 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + ); + """ + } + } + + // begin test + test_export('csv', 'csv', true); + order_qt_select """ select * from ${table_load_name} order by user_id limit 1000 """ + order_qt_select_cnt """ select count(*) from ${table_load_name} """ + } } diff --git a/regression-test/suites/export_p2/test_outfile_orc_max_file_size.groovy b/regression-test/suites/export_p2/test_outfile_orc_max_file_size.groovy index 1c8b3660ed5..f29e05a1867 100644 --- a/regression-test/suites/export_p2/test_outfile_orc_max_file_size.groovy +++ b/regression-test/suites/export_p2/test_outfile_orc_max_file_size.groovy @@ -15,105 +15,79 @@ // specific language governing permissions and limitations // under the License. -suite("test_outfile_orc_max_file_size", "p2") { - // open nereids - sql """ set enable_nereids_planner=true """ - sql """ set enable_fallback_to_original_planner=false """ +suite("test_outfile_orc_max_file_size", "p2,external") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ - - String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") - String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") - String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") - String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") - String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") - String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") - String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") - String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") - String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") - - // the path used to load data - def load_data_path = "/user/export_test/test_orc_max_file_size.orc" - // the path used to export data - def outFilePath = """/user/export_test/test_max_file_size/test_orc/exp_""" - - def create_table = {table_name -> - sql """ DROP TABLE IF EXISTS ${table_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_name} ( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` INT COMMENT "用户年龄", - `sex` INT COMMENT "用户性别", - `bool_col` boolean COMMENT "", - `int_col` int COMMENT "", - `bigint_col` bigint COMMENT "", - `largeint_col` largeint COMMENT "", - `float_col` float COMMENT "", - `double_col` double COMMENT "", - `char_col` CHAR(10) COMMENT "", - `decimal_col` decimal COMMENT "" - ) - DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); - """ - } + + String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") + String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") + String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") + String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") + String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") + String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") + String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") + String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") + String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") + String hadoopSecurityAutoToLocal =context.config.otherConfigs.get("hadoopSecurityAutoToLocal") - def table_export_name = "test_outfile_orc_max_file_size" - - create_table(table_export_name) + // the path used to load data + def load_data_path = "/user/export_test/test_orc_max_file_size.orc" + // the path used to export data + def outFilePath = """/user/export_test/test_max_file_size/test_orc/exp_""" + + def create_table = {table_name -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` INT COMMENT "用户年龄", + `sex` INT COMMENT "用户性别", + `bool_col` boolean COMMENT "", + `int_col` int COMMENT "", + `bigint_col` bigint COMMENT "", + `largeint_col` largeint COMMENT "", + `float_col` float COMMENT "", + `double_col` double COMMENT "", + `char_col` CHAR(10) COMMENT "", + `decimal_col` decimal COMMENT "" + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + } - // load data - sql """ - insert into ${table_export_name} - select * from hdfs( - "uri" = "hdfs://${dfsNameservices}${load_data_path}", - "format" = "orc", - "dfs.data.transfer.protection" = "integrity", - 'dfs.nameservices'="${dfsNameservices}", - 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", - 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", - 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", - 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", - 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - """ + def table_export_name = "test_outfile_orc_max_file_size" - def test_outfile_orc_success = {maxFileSize, isDelete, fileNumber, totalRows -> - def table = sql """ - select * from ${table_export_name} - into outfile "hdfs://${dfsNameservices}${outFilePath}" - FORMAT AS ORC - PROPERTIES( - "max_file_size" = "${maxFileSize}", - "delete_existing_files"="${isDelete}", - "dfs.data.transfer.protection" = "integrity", - 'dfs.nameservices'="${dfsNameservices}", - 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", - 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", - 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", - 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", - 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - """ + create_table(table_export_name) - log.info("table = " + table); - // assertTrue(table.size() == 1) - // assertTrue(table[0].size() == 4) - log.info("outfile result = " + table[0]) - assertEquals(table[0][0], fileNumber) - assertEquals(table[0][1], totalRows) - } + // load data + sql """ + insert into ${table_export_name} + select * from hdfs( + "uri" = "hdfs://${dfsNameservices}${load_data_path}", + "format" = "orc", + "dfs.data.transfer.protection" = "integrity", + 'dfs.nameservices'="${dfsNameservices}", + 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", + 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", + 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", + 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", + 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + ); + """ - def test_outfile_orc_fail = {maxFileSize, isDelete -> - test { - sql """ + def test_outfile_orc_success = {maxFileSize, isDelete, fileNumber, totalRows -> + def table = sql """ select * from ${table_export_name} into outfile "hdfs://${dfsNameservices}${outFilePath}" FORMAT AS ORC @@ -129,24 +103,58 @@ suite("test_outfile_orc_max_file_size", "p2") { 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" ); """ - // other check will not work because already declared a check callback - exception "max file size should between 5MB and 2GB" + log.info("table = " + table); + // assertTrue(table.size() == 1) + // assertTrue(table[0].size() == 4) + log.info("outfile result = " + table[0]) + assertEquals(table[0][0], fileNumber) + assertEquals(table[0][1], totalRows) + } + + def test_outfile_orc_fail = {maxFileSize, isDelete -> + test { + sql """ + select * from ${table_export_name} + into outfile "hdfs://${dfsNameservices}${outFilePath}" + FORMAT AS ORC + PROPERTIES( + "max_file_size" = "${maxFileSize}", + "delete_existing_files"="${isDelete}", + "dfs.data.transfer.protection" = "integrity", + 'dfs.nameservices'="${dfsNameservices}", + 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", + 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", + 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", + 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", + 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + ); + """ + + // other check will not work because already declared a check callback + exception "max file size should between 5MB and 2GB" - // callback - check { result, exception, startTime, endTime -> - assertTrue(exception != null) + // callback + check { result, exception, startTime, endTime -> + assertTrue(exception != null) + } } } - } - test_outfile_orc_fail('3MB', true) - test_outfile_orc_fail('2.1GB', true) - test_outfile_orc_success('5MB', true, 3, 2000000) - test_outfile_orc_success('63MB', true, 3, 2000000) - test_outfile_orc_success('64MB', true, 3, 2000000) - test_outfile_orc_success('80MB', true, 2, 2000000) + test_outfile_orc_fail('3MB', true) + test_outfile_orc_fail('2.1GB', true) + test_outfile_orc_success('5MB', true, 3, 2000000) + test_outfile_orc_success('63MB', true, 3, 2000000) + test_outfile_orc_success('64MB', true, 3, 2000000) + test_outfile_orc_success('80MB', true, 2, 2000000) + } + } diff --git a/regression-test/suites/export_p2/test_outfile_p2.groovy b/regression-test/suites/export_p2/test_outfile_p2.groovy index 5472273b3eb..a10db954c0e 100644 --- a/regression-test/suites/export_p2/test_outfile_p2.groovy +++ b/regression-test/suites/export_p2/test_outfile_p2.groovy @@ -15,53 +15,58 @@ // specific language governing permissions and limitations // under the License. -suite("test_outfile_p2", "p2") { +suite("test_outfile_p2", "p2,external") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ + String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") + String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") + String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") + String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") + String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") + String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") + String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") + String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") + String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") + String hadoopSecurityAutoToLocal =context.config.otherConfigs.get("hadoopSecurityAutoToLocal") - String dfsNameservices=context.config.otherConfigs.get("dfsNameservices") - String dfsHaNamenodesHdfsCluster=context.config.otherConfigs.get("dfsHaNamenodesHdfsCluster") - String dfsNamenodeRpcAddress1=context.config.otherConfigs.get("dfsNamenodeRpcAddress1") - String dfsNamenodeRpcAddress2=context.config.otherConfigs.get("dfsNamenodeRpcAddress2") - String dfsNamenodeRpcAddress3=context.config.otherConfigs.get("dfsNamenodeRpcAddress3") - String dfsNameservicesPort=context.config.otherConfigs.get("dfsNameservicesPort") - String hadoopSecurityAuthentication =context.config.otherConfigs.get("hadoopSecurityAuthentication") - String hadoopKerberosKeytabPath =context.config.otherConfigs.get("hadoopKerberosKeytabPath") - String hadoopKerberosPrincipal =context.config.otherConfigs.get("hadoopKerberosPrincipal") + def table_outfile_name = "test_outfile_hdfs" + // create table and insert + sql """ DROP TABLE IF EXISTS ${table_outfile_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_outfile_name} ( + `id` int(11) NULL, + `name` string NULL + ) + DISTRIBUTED BY HASH(id) PROPERTIES("replication_num" = "1"); + """ + sql """insert into ${table_outfile_name} values(1, 'abc');""" - def table_outfile_name = "test_outfile_hdfs" - // create table and insert - sql """ DROP TABLE IF EXISTS ${table_outfile_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_outfile_name} ( - `id` int(11) NULL, - `name` string NULL - ) - DISTRIBUTED BY HASH(id) PROPERTIES("replication_num" = "1"); - """ + qt_sql_1 """select * from ${table_outfile_name} order by id""" - sql """insert into ${table_outfile_name} values(1, 'abc');""" - - qt_sql_1 """select * from ${table_outfile_name} order by id""" - - // use a simple sql to make sure there is only one fragment - // #21343 - sql """ - SELECT * FROM ${table_outfile_name} - INTO OUTFILE "hdfs://${dfsNameservices}/user/outfile_test/" - FORMAT AS parquet - PROPERTIES - ( - "dfs.data.transfer.protection" = "integrity", - 'dfs.nameservices'="${dfsNameservices}", - 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", - 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", - 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", - 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", - 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", - 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - """ - + // use a simple sql to make sure there is only one fragment + // #21343 + sql """ + SELECT * FROM ${table_outfile_name} + INTO OUTFILE "hdfs://${dfsNameservices}/user/outfile_test/" + FORMAT AS parquet + PROPERTIES + ( + "dfs.data.transfer.protection" = "integrity", + 'dfs.nameservices'="${dfsNameservices}", + 'dfs.ha.namenodes.hdfs-cluster'="${dfsHaNamenodesHdfsCluster}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn1'="${dfsNamenodeRpcAddress1}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn2'="${dfsNamenodeRpcAddress2}:${dfsNameservicesPort}", + 'dfs.namenode.rpc-address.hdfs-cluster.nn3'="${dfsNamenodeRpcAddress3}:${dfsNameservicesPort}", + 'hadoop.security.authentication'="${hadoopSecurityAuthentication}", + 'hadoop.kerberos.keytab'="${hadoopKerberosKeytabPath}", + 'hadoop.kerberos.principal'="${hadoopKerberosPrincipal}", + 'hadoop.security.auth_to_local' = "${hadoopSecurityAutoToLocal}", + 'dfs.client.failover.proxy.provider.hdfs-cluster'="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" + ); + """ + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org