This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a1ff02288f2 branch-2.1: [fix](hive) support query hive view created by 
spark (#43553)
a1ff02288f2 is described below

commit a1ff02288f20e140b219b4ed87a822da29c077e5
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Nov 11 23:28:53 2024 +0800

    branch-2.1: [fix](hive) support query hive view created by spark (#43553)
    
    Cherry-picked from #43530
    
    Co-authored-by: Mingyu Chen (Rayner) <morning...@163.com>
    Co-authored-by: morningman <yun...@selectdb.com>
---
 .../scripts/create_view_scripts/create_view.hql    |   6 +
 .../docker-compose/hive/scripts/hive-metastore.sh  |   7 ++
 .../doris/datasource/hive/HMSExternalTable.java    |   4 +
 .../doris/nereids/rules/analysis/BindRelation.java |  19 ++-
 .../data/external_table_p0/hive/test_hive_view.out | 133 +++++++++++++++++++++
 .../external_table_p0/hive/test_hive_view.groovy   |  66 ++++++++++
 6 files changed, 229 insertions(+), 6 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
new file mode 100644
index 00000000000..221b536e263
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_view_scripts/create_view.hql
@@ -0,0 +1,6 @@
+use default;
+create view test_view1 as select * from sale_table;
+create view test_view2 as select * from default.sale_table;
+create view test_view3 as select * from sale_table where 
bill_code="bill_code1";
+create view test_view4 as select parquet_zstd_all_types.t_int, 
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join 
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = 
parquet_all_types.t_varchar order by t_int limit 10;
+create view unsupported_view as select bill_code from sale_table union all 
select t_varchar from multi_catalog.parquet_all_types order by bill_code limit 
10;
diff --git a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh 
b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
index 46d1437db29..422e3451f5c 100755
--- a/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
+++ b/docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh
@@ -114,6 +114,13 @@ ls /mnt/scripts/create_preinstalled_scripts/*.hql | xargs 
-n 1 -P 10 -I {} bash
     echo "Script: {} executed in $EXECUTION_TIME seconds"
 '
 
+# create view
+START_TIME=$(date +%s)
+hive -f /mnt/scripts/create_view_scripts/create_view.hql
+END_TIME=$(date +%s)
+EXECUTION_TIME=$((END_TIME - START_TIME))
+echo "Script: create_view.hql executed in $EXECUTION_TIME seconds"
+
 touch /mnt/SUCCESS
 
 # Avoid container exit
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index aaa59d44144..5f2c8cbddf3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -253,6 +253,10 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         if (remoteTable.getSd() == null) {
             throw new NotSupportedException("remote table's storage descriptor 
is null");
         }
+        // If this is hive view, no need to check file format.
+        if (remoteTable.isSetViewExpandedText() || 
remoteTable.isSetViewOriginalText()) {
+            return true;
+        }
         String inputFileFormat = remoteTable.getSd().getInputFormat();
         if (inputFileFormat == null) {
             throw new NotSupportedException("remote table's storage input 
format is null");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
index fee0789bfe5..5e049a9d9b3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java
@@ -406,8 +406,10 @@ public class BindRelation extends OneAnalysisRuleFactory {
                     if (Config.enable_query_hive_views && hmsTable.isView()) {
                         isView = true;
                         String hiveCatalog = hmsTable.getCatalog().getName();
+                        String hiveDb = hmsTable.getDatabase().getFullName();
                         String ddlSql = hmsTable.getViewText();
-                        Plan hiveViewPlan = parseAndAnalyzeHiveView(hmsTable, 
hiveCatalog, ddlSql, cascadesContext);
+                        Plan hiveViewPlan = parseAndAnalyzeHiveView(
+                                hmsTable, hiveCatalog, hiveDb, ddlSql, 
cascadesContext);
                         return new LogicalSubQueryAlias<>(qualifiedTableName, 
hiveViewPlan);
                     }
                     if (hmsTable.getDlaType() == DLAType.HUDI) {
@@ -459,15 +461,20 @@ public class BindRelation extends OneAnalysisRuleFactory {
     }
 
     private Plan parseAndAnalyzeHiveView(
-            HMSExternalTable table, String hiveCatalog, String ddlSql, 
CascadesContext cascadesContext) {
+            HMSExternalTable table, String hiveCatalog, String hiveDb, String 
ddlSql, CascadesContext cascadesContext) {
         ConnectContext ctx = cascadesContext.getConnectContext();
         String previousCatalog = ctx.getCurrentCatalog().getName();
         String previousDb = ctx.getDatabase();
+        // change catalog and db to hive catalog and db, so that we can parse 
and analyze the view sql in hive context.
         ctx.changeDefaultCatalog(hiveCatalog);
-        Plan hiveViewPlan = parseAndAnalyzeView(table, ddlSql, 
cascadesContext);
-        ctx.changeDefaultCatalog(previousCatalog);
-        ctx.setDatabase(previousDb);
-        return hiveViewPlan;
+        ctx.setDatabase(hiveDb);
+        try {
+            return parseAndAnalyzeView(table, ddlSql, cascadesContext);
+        } finally {
+            // restore catalog and db in connect context
+            ctx.changeDefaultCatalog(previousCatalog);
+            ctx.setDatabase(previousDb);
+        }
     }
 
     private Plan parseAndAnalyzeView(TableIf view, String ddlSql, 
CascadesContext parentContext) {
diff --git a/regression-test/data/external_table_p0/hive/test_hive_view.out 
b/regression-test/data/external_table_p0/hive/test_hive_view.out
new file mode 100644
index 00000000000..de207860d6d
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_view.out
@@ -0,0 +1,133 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !desc1 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc2 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc3 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc4 --
+t_int  int     Yes     true    \N      
+t_varchar      varchar(65535)  Yes     true    \N      
+
+-- !sql1 --
+bill_code      dates   ord_year        ord_month       ord_quarter     on_time
+bill_code1     dates2  ord_year3       ord_month4      ord_quarter5    on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code      varchar(65535)  Yes     true    \N      
+
+-- !desc1 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc2 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc3 --
+bill_code      varchar(500)    Yes     true    \N      
+dates  varchar(500)    Yes     true    \N      
+ord_year       varchar(500)    Yes     true    \N      
+ord_month      varchar(500)    Yes     true    \N      
+ord_quarter    varchar(500)    Yes     true    \N      
+on_time        varchar(500)    Yes     true    \N      
+
+-- !desc4 --
+t_int  int     Yes     true    \N      
+t_varchar      varchar(65535)  Yes     true    \N      
+
+-- !sql1 --
+bill_code      dates   ord_year        ord_month       ord_quarter     on_time
+bill_code1     dates2  ord_year3       ord_month4      ord_quarter5    on_time6
+
+-- !sql2 --
+dates
+dates2
+
+-- !sql3 --
+1
+
+-- !sql4 --
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+3      test
+
+-- !sql5 --
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+
+-- !desc5 --
+bill_code      varchar(65535)  Yes     true    \N      
+
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_view.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
new file mode 100644
index 00000000000..f344888019f
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_view.groovy
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_view", 
"external_docker,hive,external_docker_hive,p0,external") {
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        setHivePrefix(hivePrefix)
+        String catalog_name = "test_${hivePrefix}_view"
+        String ex_db_name = "`default`"
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+
+        sql """drop catalog if exists ${catalog_name} """
+
+        sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+            'type'='hms',
+            'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
+            'hadoop.username' = 'hive'
+        );"""
+
+        sql """use ${catalog_name}.${ex_db_name}"""
+        qt_desc1 """desc test_view1"""
+        qt_desc2 """desc test_view2"""
+        qt_desc3 """desc test_view3"""
+        qt_desc4 """desc test_view4"""
+        // create view test_view1 as select * from sale_table;
+        order_qt_sql1 """ select * from test_view1 """
+        // create view test_view2 as select * from default.sale_table;
+        order_qt_sql2 """ select dates from test_view2 """
+        // create view test_view3 as select * from sale_table where 
bill_code="bill_code1";
+        order_qt_sql3 """ select count(*) from test_view3 """
+        // create view test_view4 as select parquet_zstd_all_types.t_int, 
parquet_zstd_all_types.t_varchar from parquet_zstd_all_types join 
multi_catalog.parquet_all_types on parquet_zstd_all_types.t_varchar = 
parquet_all_types.t_varchar order by t_int limit 10;
+        order_qt_sql4 """ select * from test_view4 """
+        order_qt_sql5 """ select test_view4.t_int from test_view4 join 
multi_catalog.parquet_all_types on test_view4.t_varchar = 
parquet_all_types.t_varchar order by test_view4.t_int limit 10; """
+
+        // check unsupported view
+        sql """set enable_fallback_to_original_planner=false;"""
+        test {
+            sql """select * from unsupported_view;"""
+            exception """Unknown column 'bill_code' in '_u2' in SORT clause"""
+        }
+        // pr #43530
+        qt_desc5 """desc unsupported_view;"""
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to