This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new a1ff02288f2 branch-2.1: [fix](hive) support query hive view created by spark (#43553) a1ff02288f2 is described below commit a1ff02288f20e140b219b4ed87a822da29c077e5 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Mon Nov 11 23:28:53 2024 +0800 branch-2.1: [fix](hive) support query hive view created by spark (#43553) Cherry-picked from #43530 Co-authored-by: Mingyu Chen (Rayner) <morning...@163.com> Co-authored-by: morningman <yun...@selectdb.com> --- .../scripts/create_view_scripts/create_view.hql | 6 + .../docker-compose/hive/scripts/hive-metastore.sh | 7 ++ .../doris/datasource/hive/HMSExternalTable.java | 4 + .../doris/nereids/rules/analysis/BindRelation.java | 19 ++- .../data/external_table_p0/hive/test_hive_view.out | 133 +++++++++++++++++++++ .../external_table_p0/hive/test_hive_view.groovy | 66 ++++++++++ 6 files changed, 229 insertions(+), 6 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql new file mode 100644 index 00000000000..221b536e263 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql @@ -0,0 +1,6 @@ +use default; +create view test_view1 as select * from sale_table; +create view test_view2 as select * from default.sale_table; +create view test_view3 as select * from sale_table where bill_code="bill_code1"; +create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10; +create view unsupported_view as select bill_code from sale_table union all select t_varchar from multi_catalog.parquet_all_types order by bill_code limit 10; diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh index 46d1437db29..422e3451f5c 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh +++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh @@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs -n 1 -P 10 -I {} bash echo "Script: {} executed in $EXECUTION_TIME seconds" ' +# create view +START_TIME=$(date +%s) +hive -f /mnt/scripts/create_view_scripts/create_view.hql +END_TIME=$(date +%s) +EXECUTION_TIME=$((END_TIME - START_TIME)) +echo "Script: create_view.hql executed in $EXECUTION_TIME seconds" + touch /mnt/SUCCESS # Avoid container exit diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index aaa59d44144..5f2c8cbddf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -253,6 +253,10 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI if (remoteTable.getSd() == null) { throw new NotSupportedException("remote table's storage descriptor is null"); } + // If this is hive view, no need to check file format. + if (remoteTable.isSetViewExpandedText() || remoteTable.isSetViewOriginalText()) { + return true; + } String inputFileFormat = remoteTable.getSd().getInputFormat(); if (inputFileFormat == null) { throw new NotSupportedException("remote table's storage input format is null"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index fee0789bfe5..5e049a9d9b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -406,8 +406,10 @@ public class BindRelation extends OneAnalysisRuleFactory { if (Config.enable_query_hive_views && hmsTable.isView()) { isView = true; String hiveCatalog = hmsTable.getCatalog().getName(); + String hiveDb = hmsTable.getDatabase().getFullName(); String ddlSql = hmsTable.getViewText(); - Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable, hiveCatalog, ddlSql, cascadesContext); + Plan hiveViewPlan = parseAndAnalyzeHiveView( + hmsTable, hiveCatalog, hiveDb, ddlSql, cascadesContext); return new LogicalSubQueryAlias<>(qualifiedTableName, hiveViewPlan); } if (hmsTable.getDlaType() == DLAType.HUDI) { @@ -459,15 +461,20 @@ public class BindRelation extends OneAnalysisRuleFactory { } private Plan parseAndAnalyzeHiveView( - HMSExternalTable table, String hiveCatalog, String ddlSql, CascadesContext cascadesContext) { + HMSExternalTable table, String hiveCatalog, String hiveDb, String ddlSql, CascadesContext cascadesContext) { ConnectContext ctx = cascadesContext.getConnectContext(); String previousCatalog = ctx.getCurrentCatalog().getName(); String previousDb = ctx.getDatabase(); + // change catalog and db to hive catalog and db, so that we can parse and analyze the view sql in hive context. ctx.changeDefaultCatalog(hiveCatalog); - Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql, cascadesContext); - ctx.changeDefaultCatalog(previousCatalog); - ctx.setDatabase(previousDb); - return hiveViewPlan; + ctx.setDatabase(hiveDb); + try { + return parseAndAnalyzeView(table, ddlSql, cascadesContext); + } finally { + // restore catalog and db in connect context + ctx.changeDefaultCatalog(previousCatalog); + ctx.setDatabase(previousDb); + } } private Plan parseAndAnalyzeView(TableIf view, String ddlSql, CascadesContext parentContext) { diff --git a/regression-test/data/external_table_p0/hive/test_hive_view.out b/regression-test/data/external_table_p0/hive/test_hive_view.out new file mode 100644 index 00000000000..de207860d6d --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_hive_view.out @@ -0,0 +1,133 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !desc1 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc2 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc3 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc4 -- +t_int int Yes true \N +t_varchar varchar(65535) Yes true \N + +-- !sql1 -- +bill_code dates ord_year ord_month ord_quarter on_time +bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6 + +-- !sql2 -- +dates +dates2 + +-- !sql3 -- +1 + +-- !sql4 -- +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test + +-- !sql5 -- +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 + +-- !desc5 -- +bill_code varchar(65535) Yes true \N + +-- !desc1 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc2 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc3 -- +bill_code varchar(500) Yes true \N +dates varchar(500) Yes true \N +ord_year varchar(500) Yes true \N +ord_month varchar(500) Yes true \N +ord_quarter varchar(500) Yes true \N +on_time varchar(500) Yes true \N + +-- !desc4 -- +t_int int Yes true \N +t_varchar varchar(65535) Yes true \N + +-- !sql1 -- +bill_code dates ord_year ord_month ord_quarter on_time +bill_code1 dates2 ord_year3 ord_month4 ord_quarter5 on_time6 + +-- !sql2 -- +dates +dates2 + +-- !sql3 -- +1 + +-- !sql4 -- +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test +3 test + +-- !sql5 -- +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 + +-- !desc5 -- +bill_code varchar(65535) Yes true \N + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_view.groovy b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy new file mode 100644 index 00000000000..f344888019f --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_view", "external_docker,hive,external_docker_hive,p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + setHivePrefix(hivePrefix) + String catalog_name = "test_${hivePrefix}_view" + String ex_db_name = "`default`" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + + sql """drop catalog if exists ${catalog_name} """ + + sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'hadoop.username' = 'hive' + );""" + + sql """use ${catalog_name}.${ex_db_name}""" + qt_desc1 """desc test_view1""" + qt_desc2 """desc test_view2""" + qt_desc3 """desc test_view3""" + qt_desc4 """desc test_view4""" + // create view test_view1 as select * from sale_table; + order_qt_sql1 """ select * from test_view1 """ + // create view test_view2 as select * from default.sale_table; + order_qt_sql2 """ select dates from test_view2 """ + // create view test_view3 as select * from sale_table where bill_code="bill_code1"; + order_qt_sql3 """ select count(*) from test_view3 """ + // create view test_view4 as select parquet_zstd_all_types.t_int, parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = parquet_all_types.t_varchar order by t_int limit 10; + order_qt_sql4 """ select * from test_view4 """ + order_qt_sql5 """ select test_view4.t_int from test_view4 join multi_catalog.parquet_all_types on test_view4.t_varchar = parquet_all_types.t_varchar order by test_view4.t_int limit 10; """ + + // check unsupported view + sql """set enable_fallback_to_original_planner=false;""" + test { + sql """select * from unsupported_view;""" + exception """Unknown column 'bill_code' in '_u2' in SORT clause""" + } + // pr #43530 + qt_desc5 """desc unsupported_view;""" + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org