This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4c549d79f2648c5ed906deef42c8d05bb747d8b0
Author: Pranav Lodha <[email protected]>
AuthorDate: Mon Jun 24 15:24:11 2024 -0700

    IMPALA-12992: Support for Hive JDBC Storage handler tables
    
    This is an enhancement request to support JDBC tables
    created by Hive JDBC Storage handler. This is essentially
    done by making JDBC table properties compatible with
    Impala. It is done by translating when loading the table,
    and maintaining that only in the Impala cluster, i.e. it's
    not written back to HMS.
    
    Impala includes JDBC drivers for PostgreSQL and MySQL
    making 'driver.url' not mandatory in such cases. The
    Impala JDBC driver is still required for Impala-to-Impala
    JDBC connections. Additionally, Hive allows adding database
    driver JARs at runtime via Beeline, enabling users to
    dynamically include JDBC driver JARs. However, Impala does
    not support adding database driver JARs at runtime,
    making the driver.url field still useful
    in cases where additional drivers are needed.
    
    'hive.sql.query' property is not handled in this patch.
    It'll be covered in a separate jira.
    
    Testing: End-to-end tests are included in
    test_ext_data_sources.py.
    
    Change-Id: I1674b93a02f43df8c1a449cdc54053cc80d9c458
    Reviewed-on: http://gerrit.cloudera.org:8080/22134
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 bin/impala-config.sh                               |   2 +
 fe/pom.xml                                         |   6 +
 .../org/apache/impala/catalog/TableLoader.java     |  67 +++++-
 .../extdatasource/jdbc/conf/JdbcStorageConfig.java |   2 +-
 .../jdbc/dao/DataSourceObjectCache.java            |  39 ++--
 .../jdbc/dao/GenericJdbcDatabaseAccessor.java      |   6 +-
 .../org/apache/impala/analysis/AnalyzeDDLTest.java |   6 +-
 testdata/bin/load-ext-data-sources.sh              |  33 ++-
 testdata/bin/setup-mysql-env.sh                    |  32 +++
 .../queries/QueryTest/hive-jdbc-mysql-tables.test  | 234 +++++++++++++++++++++
 .../QueryTest/hive-jdbc-postgres-tables.test       | 234 +++++++++++++++++++++
 tests/custom_cluster/test_ext_data_sources.py      | 167 ++++++++++++++-
 12 files changed, 805 insertions(+), 23 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 8853f2d46..b4784c4ae 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -177,6 +177,8 @@ export IMPALA_PROTOBUF_CLANG_VERSION=3.14.0-clangcompat-p2
 unset IMPALA_PROTOBUF_CLANG_URL
 export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.6
 unset IMPALA_POSTGRES_JDBC_DRIVER_URL
+export IMPALA_MYSQL_JDBC_DRIVER_VERSION=8.2.0
+unset IMPALA_MYSQL_JDBC_DRIVER_URL
 export IMPALA_PYTHON_VERSION=2.7.16
 unset IMPALA_PYTHON_URL
 export IMPALA_PYTHON3_VERSION=3.8.18
diff --git a/fe/pom.xml b/fe/pom.xml
index 9b0f5ebb7..39970eb89 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -310,6 +310,12 @@ under the License.
       <version>${postgres.jdbc.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>com.mysql</groupId>
+      <artifactId>mysql-connector-j</artifactId>
+      <version>8.2.0</version>
+    </dependency>
+
     <dependency>
       <groupId>org.antlr</groupId>
       <artifactId>antlr-runtime</artifactId>
diff --git a/fe/src/main/java/org/apache/impala/catalog/TableLoader.java 
b/fe/src/main/java/org/apache/impala/catalog/TableLoader.java
index a30313f15..f625ca100 100644
--- a/fe/src/main/java/org/apache/impala/catalog/TableLoader.java
+++ b/fe/src/main/java/org/apache/impala/catalog/TableLoader.java
@@ -20,6 +20,8 @@ package org.apache.impala.catalog;
 import com.google.common.base.Preconditions;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
@@ -117,6 +119,14 @@ public class TableLoader {
             "Unsupported table type '%s' for: %s", tableType, fullTblName));
       }
 
+      // Support for Hive JDBC storage handler
+      String val = msTbl.getParameters().get("storage_handler");
+      if (val != null && 
val.equals("org.apache.hive.storage.jdbc.JdbcStorageHandler")) {
+        Map<String, String> impalaTblProps = setHiveJdbcProperties(msTbl);
+        msTbl.unsetParameters();
+        msTbl.setParameters(impalaTblProps);
+      }
+
       // Create a table of appropriate type and have it load itself
       table = Table.fromMetastoreTable(db, msTbl);
       if (table == null) {
@@ -179,6 +189,61 @@ public class TableLoader {
     return table;
   }
 
+  private Map<String, String> setHiveJdbcProperties(
+      org.apache.hadoop.hive.metastore.api.Table msTbl) throws 
TableLoadingException {
+    Map<String, String> impala_tbl_props = new HashMap<>();
+    // Insert Impala specific JDBC storage handler properties
+    impala_tbl_props.put("__IMPALA_DATA_SOURCE_NAME", "impalajdbcdatasource");
+    String val = msTbl.getParameters().get("hive.sql.database.type");
+    if (val == null) {
+      throw new TableLoadingException("Required parameter: 
hive.sql.database.type" +
+          "is missing.");
+    } else {
+      impala_tbl_props.put("database.type", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.dbcp.password");
+    if (val != null) {
+      impala_tbl_props.put("dbcp.password", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.dbcp.password.keystore");
+    if (val != null) {
+      impala_tbl_props.put("dbcp.password.keystore", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.dbcp.password.key");
+    if (val != null) {
+      impala_tbl_props.put("dbcp.password.key", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.jdbc.url");
+    if (val == null) {
+      throw new TableLoadingException("Required parameter: hive.sql.jdbc.url" +
+          "is missing.");
+    } else {
+      impala_tbl_props.put("jdbc.url", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.dbcp.username");
+    if (val == null) {
+      throw new TableLoadingException("Required parameter: 
hive.sql.dbcp.username" +
+          "is missing.");
+    } else {
+      impala_tbl_props.put("dbcp.username", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.table");
+    if (val == null) {
+      throw new TableLoadingException("Required parameter: hive.sql.table" +
+          "is missing.");
+    } else {
+      impala_tbl_props.put("table", val);
+    }
+    val = msTbl.getParameters().get("hive.sql.jdbc.driver");
+    if (val == null) {
+      throw new TableLoadingException("Required parameter: 
hive.sql.jdbc.driver" +
+          "is missing.");
+    } else {
+      impala_tbl_props.put("jdbc.driver", val);
+    }
+    return impala_tbl_props;
+  }
+
   private void initMetrics(Metrics metrics) {
     metrics.addTimer(
         MetastoreEventsProcessor.EVENTS_FETCH_DURATION_METRIC);
@@ -205,4 +270,4 @@ public class TableLoader {
     metrics.addCounter(
         MetastoreEventsProcessor.NUMBER_OF_PARTITIONS_REMOVED);
   }
-}
+}
\ No newline at end of file
diff --git 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
index 2611a591b..36d00a5bc 100644
--- 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
+++ 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
@@ -32,7 +32,7 @@ public enum JdbcStorageConfig {
   JDBC_DRIVER_CLASS("jdbc.driver", true),
   // Driver URL for downloading the Jar file package that is used to access 
the external
   // database.
-  JDBC_DRIVER_URL("driver.url", true),
+  JDBC_DRIVER_URL("driver.url", false),
   // Username for accessing the external database.
   DBCP_USERNAME("dbcp.username", false),
   // Password of the user.
diff --git 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java
 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java
index 5937a0757..c64ee9e34 100644
--- 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java
+++ 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/DataSourceObjectCache.java
@@ -18,6 +18,8 @@
 package org.apache.impala.extdatasource.jdbc.dao;
 
 import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.sql.SQLException;
@@ -25,7 +27,6 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
-
 import javax.sql.DataSource;
 
 import org.apache.commons.dbcp2.BasicDataSource;
@@ -106,21 +107,35 @@ public class DataSourceObjectCache {
       String driverUrl = props.getProperty("driverUrl");
       try {
         BasicDataSource dbcpDs = 
BasicDataSourceFactory.createDataSource(props);
-        // Copy jdbc driver to local file system.
-        String driverLocalPath = 
FileSystemUtil.copyFileFromUriToLocal(driverUrl);
-        // Create class loader for jdbc driver and set it for the
-        // BasicDataSource object so that the driver class could be loaded
-        // from jar file without searching classpath.
-        URL driverJarUrl = new File(driverLocalPath).toURI().toURL();
-        URLClassLoader driverLoader = URLClassLoader.newInstance(
-            new URL[] { driverJarUrl }, getClass().getClassLoader());
-        dbcpDs.setDriverClassLoader(driverLoader);
+        String driverLocalPath = null;
+        if (!Strings.isNullOrEmpty(driverUrl)) {
+          // Copy jdbc driver to local file system.
+          driverLocalPath = FileSystemUtil.copyFileFromUriToLocal(driverUrl);
+          // Create class loader for jdbc driver and set it for the
+          // BasicDataSource object so that the driver class could be loaded
+          // from jar file without searching classpath.
+          URL driverJarUrl = new File(driverLocalPath).toURI().toURL();
+          URLClassLoader driverLoader = URLClassLoader.newInstance(
+              new URL[] { driverJarUrl }, getClass().getClassLoader());
+          dbcpDs.setDriverClassLoader(driverLoader);
+        }
+        // Cache the datasource (no need to store driver path since it's in 
classpath)
         entry = new Entry(dbcpDs, driverLocalPath);
         cacheMap_.put(cacheKey, entry);
         return dbcpDs;
+      } catch (MalformedURLException e) {
+          throw new JdbcDatabaseAccessException(String.format(
+              "Invalid JDBC driver URL: '%s'.", driverUrl), e);
+      } catch (IOException e) {
+          throw new JdbcDatabaseAccessException(String.format(
+              "Failed to copy JDBC driver from '%s' to local filesystem.", 
driverUrl), e);
+      } catch (SQLException e) {
+          throw new JdbcDatabaseAccessException(String.format(
+              "Unable to fetch jdbc driver jar from location '%s'. ", 
driverUrl), e);
       } catch (Exception e) {
-        throw new JdbcDatabaseAccessException(String.format(
-            "Unable to fetch jdbc driver jar from location '%s'. ", 
driverUrl));
+          // createDataSource() in commons-dbcp 2.9 throws Exception.
+          throw new JdbcDatabaseAccessException(
+              "Error while creating datasource.", e);
       }
     }
   }
diff --git 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
index 75f26ad92..ddcf972a8 100644
--- 
a/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
+++ 
b/fe/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
@@ -319,8 +319,10 @@ public class GenericJdbcDatabaseAccessor implements 
DatabaseAccessor {
     dbProperties.put("url", jdbcUrl);
     dbProperties.put("driverClassName",
         conf.get(JdbcStorageConfig.JDBC_DRIVER_CLASS.getPropertyName()));
-    dbProperties.put("driverUrl",
-        conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName()));
+    if (conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName()) != null) 
{
+      dbProperties.put("driverUrl",
+          conf.get(JdbcStorageConfig.JDBC_DRIVER_URL.getPropertyName()));
+    }
     dbProperties.put("type", "javax.sql.DataSource");
     return dbProperties;
   }
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 6c96dfdd8..2ca9159ca 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -851,10 +851,8 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "tblproperties ('__IMPALA_DATA_SOURCE_NAME')",
         "Unsetting the '__IMPALA_DATA_SOURCE_NAME' table property is not 
supported " +
         "for DataSource table.");
-    AnalysisError("alter table functional.alltypes_jdbc_datasource unset " +
-        "tblproperties ('driver.url')",
-        "Unsetting the 'driver.url' table property is not supported for JDBC " 
+
-        "DataSource table.");
+    AnalyzesOk("alter table functional.alltypes_jdbc_datasource unset " +
+        "tblproperties ('driver.url')");
 
     AnalyzesOk("alter table functional.alltypes set 
tblproperties('sort.columns'='id')");
     AnalyzesOk("alter table functional.alltypes set tblproperties(" +
diff --git a/testdata/bin/load-ext-data-sources.sh 
b/testdata/bin/load-ext-data-sources.sh
index 3855fd9d3..82bfc6abe 100755
--- a/testdata/bin/load-ext-data-sources.sh
+++ b/testdata/bin/load-ext-data-sources.sh
@@ -108,6 +108,36 @@ INSERT INTO test_strategy (strategy_id, name, referrer, 
landing, priority,
 __EOT__
 sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_test_strategy.sql
 
+# Create country table
+cat > /tmp/jdbc_country.sql << __EOT__
+DROP TABLE IF EXISTS country;
+CREATE TABLE country
+(
+  id int,
+  name varchar(20),
+  bool_col BOOLEAN,
+  tinyint_col     SMALLINT,
+  smallint_col    SMALLINT,
+  int_col         INT,
+  bigint_col      BIGINT,
+  float_col       FLOAT,
+  double_col      DOUBLE PRECISION,
+  date_col        DATE,
+  string_col      VARCHAR(10),
+  timestamp_col   TIMESTAMP
+);
+INSERT INTO country (id, name, bool_col, tinyint_col, smallint_col, int_col,
+bigint_col, float_col, double_col, date_col, string_col, timestamp_col)
+VALUES
+(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01',
+  'IN', '2024-01-01 10:00:00'),
+(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01',
+  'RU', '2024-02-01 11:00:00'),
+(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01',
+  'US', '2024-03-01 12:00:00');
+__EOT__
+sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_country.sql
+
 # Load data to jdbc table
 cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/jdbc_alltypes.csv
 loadCmd="\COPY alltypes FROM '/tmp/jdbc_alltypes.csv' DELIMITER ',' CSV"
@@ -154,4 +184,5 @@ rm /tmp/jdbc_alltypes.*
 rm /tmp/jdbc_alltypes_with_quote.*
 rm /tmp/jdbc_decimal_tbl.*
 rm /tmp/jdbc_test_strategy.*
-rm /tmp/impala_jdbc_alltypes.sql
\ No newline at end of file
+rm /tmp/jdbc_country.*
+rm /tmp/impala_jdbc_alltypes.sql
diff --git a/testdata/bin/setup-mysql-env.sh b/testdata/bin/setup-mysql-env.sh
index e1bd612ff..4b5f666c3 100755
--- a/testdata/bin/setup-mysql-env.sh
+++ b/testdata/bin/setup-mysql-env.sh
@@ -141,6 +141,38 @@ __EOT__
 docker exec -i mysql mysql -uroot -psecret functional < \
   /tmp/mysql_jdbc_decimal_tbl.sql
 
+# Create country table
+cat > /tmp/jdbc_country.sql << __EOT__
+DROP TABLE IF EXISTS country;
+CREATE TABLE country
+(
+  id int,
+  name varchar(20),
+  bool_col BOOLEAN,
+  tinyint_col     SMALLINT,
+  smallint_col    SMALLINT,
+  int_col         INT,
+  bigint_col      BIGINT,
+  float_col       FLOAT,
+  double_col      DOUBLE,
+  date_col        DATE,
+  string_col      VARCHAR(10),
+  timestamp_col   TIMESTAMP
+);
+INSERT INTO country (id, name, bool_col, tinyint_col, smallint_col, int_col,
+bigint_col, float_col, double_col, date_col, string_col, timestamp_col)
+VALUES
+(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01',
+  'IN', '2024-01-01 10:00:00'),
+(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01',
+  'RU', '2024-02-01 11:00:00'),
+(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01',
+  'US', '2024-03-01 12:00:00');
+__EOT__
+
+docker exec -i mysql mysql -uroot -psecret functional < \
+  /tmp/jdbc_country.sql
+
 # Load data to jdbc table
 cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/mysql_jdbc_alltypes.csv
 docker cp /tmp/mysql_jdbc_alltypes.csv mysql:/tmp
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test
new file mode 100644
index 000000000..7dc5a5dae
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-mysql-tables.test
@@ -0,0 +1,234 @@
+====
+---- QUERY
+select * from country_mysql;
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01
 11:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_mysql;
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01
 11:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select id, name, date_col from country_mysql;
+---- RESULTS
+1,'India',2024-01-01
+2,'Russia',2024-02-01
+3,'USA',2024-03-01
+---- TYPES
+INT, STRING, DATE
+====
+---- QUERY
+select id, name, date_col from country_keystore_mysql;
+---- RESULTS
+1,'India',2024-01-01
+2,'Russia',2024-02-01
+3,'USA',2024-03-01
+---- TYPES
+INT, STRING, DATE
+====
+---- QUERY
+select count(*) from country_mysql;
+---- RESULTS
+3
+---- TYPES
+BIGINT
+====
+---- QUERY
+select count(*) from country_keystore_mysql;
+---- RESULTS
+3
+---- TYPES
+BIGINT
+====
+---- QUERY
+select distinct name from country_mysql;
+---- RESULTS
+'India'
+'Russia'
+'USA'
+---- TYPES
+STRING
+====
+---- QUERY
+select distinct name from country_keystore_mysql;
+---- RESULTS
+'India'
+'Russia'
+'USA'
+---- TYPES
+STRING
+====
+---- QUERY
+# Check for null values
+select * from country_mysql where string_col IS NULL;
+---- RESULTS
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_mysql where string_col IS NULL;
+---- RESULTS
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+# Order by integer column
+select id, name, int_col from country_mysql order by int_col DESC;
+---- RESULTS
+3,'USA',3000
+2,'Russia',2000
+1,'India',1000
+---- TYPES
+INT, STRING, INT
+====
+---- QUERY
+select id, name, int_col from country_keystore_mysql order by int_col DESC;
+---- RESULTS
+3,'USA',3000
+2,'Russia',2000
+1,'India',1000
+---- TYPES
+INT, STRING, INT
+====
+---- QUERY
+# Select using case statement
+select id, name, case when bool_col then 'Active' else 'Inactive' end as 
status from country_mysql;
+---- RESULTS
+1,'India','Active'
+2,'Russia','Inactive'
+3,'USA','Active'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select id, name, case when bool_col then 'Active' else 'Inactive' end as 
status from country_keystore_mysql;
+---- RESULTS
+1,'India','Active'
+2,'Russia','Inactive'
+3,'USA','Active'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Check for minimum date
+select min(date_col) from country_mysql;
+---- RESULTS
+2024-01-01
+---- TYPES
+DATE
+====
+---- QUERY
+select min(date_col) from country_keystore_mysql;
+---- RESULTS
+2024-01-01
+---- TYPES
+DATE
+====
+---- QUERY
+# Join with a self table alias
+select a.id, a.name, b.name from country_mysql a join country_mysql b on a.id 
<> b.id;
+---- RESULTS
+1,'India','Russia'
+1,'India','USA'
+2,'Russia','India'
+2,'Russia','USA'
+3,'USA','India'
+3,'USA','Russia'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.name from country_keystore_mysql a join 
country_keystore_mysql b on a.id <> b.id;
+---- RESULTS
+1,'India','Russia'
+1,'India','USA'
+2,'Russia','India'
+2,'Russia','USA'
+3,'USA','India'
+3,'USA','Russia'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Left join with another table
+select a.id, a.name, b.string_col from country_mysql a left join 
country_keystore_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_mysql a left join 
country_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Right join with another table
+select a.id, a.name, b.string_col from country_mysql a right join 
country_keystore_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_mysql a right join 
country_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Full outer join
+select a.id, a.name, b.string_col from country_mysql a full outer join 
country_keystore_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_mysql a full outer 
join country_mysql b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Using IN clause
+select * from country_mysql where name IN ('India', 'USA');
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_mysql where name IN ('India', 'USA');
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
\ No newline at end of file
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test
new file mode 100644
index 000000000..544a73b4c
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/hive-jdbc-postgres-tables.test
@@ -0,0 +1,234 @@
+====
+---- QUERY
+select * from country_postgres;
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01
 11:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_postgres;
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01
 11:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select id, name, date_col from country_postgres;
+---- RESULTS
+1,'India',2024-01-01
+2,'Russia',2024-02-01
+3,'USA',2024-03-01
+---- TYPES
+INT, STRING, DATE
+====
+---- QUERY
+select id, name, date_col from country_keystore_postgres;
+---- RESULTS
+1,'India',2024-01-01
+2,'Russia',2024-02-01
+3,'USA',2024-03-01
+---- TYPES
+INT, STRING, DATE
+====
+---- QUERY
+select count(*) from country_postgres;
+---- RESULTS
+3
+---- TYPES
+BIGINT
+====
+---- QUERY
+select count(*) from country_keystore_postgres;
+---- RESULTS
+3
+---- TYPES
+BIGINT
+====
+---- QUERY
+select distinct name from country_postgres;
+---- RESULTS
+'India'
+'Russia'
+'USA'
+---- TYPES
+STRING
+====
+---- QUERY
+select distinct name from country_keystore_postgres;
+---- RESULTS
+'India'
+'Russia'
+'USA'
+---- TYPES
+STRING
+====
+---- QUERY
+# Check for null values
+select * from country_postgres where string_col IS NULL;
+---- RESULTS
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_postgres where string_col IS NULL;
+---- RESULTS
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+# Order by integer column
+select id, name, int_col from country_postgres order by int_col DESC;
+---- RESULTS
+3,'USA',3000
+2,'Russia',2000
+1,'India',1000
+---- TYPES
+INT, STRING, INT
+====
+---- QUERY
+select id, name, int_col from country_keystore_postgres order by int_col DESC;
+---- RESULTS
+3,'USA',3000
+2,'Russia',2000
+1,'India',1000
+---- TYPES
+INT, STRING, INT
+====
+---- QUERY
+# Select using case statement
+select id, name, case when bool_col then 'Active' else 'Inactive' end as 
status from country_postgres;
+---- RESULTS
+1,'India','Active'
+2,'Russia','Inactive'
+3,'USA','Active'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select id, name, case when bool_col then 'Active' else 'Inactive' end as 
status from country_keystore_postgres;
+---- RESULTS
+1,'India','Active'
+2,'Russia','Inactive'
+3,'USA','Active'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Check for minimum date
+select min(date_col) from country_postgres;
+---- RESULTS
+2024-01-01
+---- TYPES
+DATE
+====
+---- QUERY
+select min(date_col) from country_keystore_postgres;
+---- RESULTS
+2024-01-01
+---- TYPES
+DATE
+====
+---- QUERY
+# Join with a self table alias
+select a.id, a.name, b.name from country_postgres a join country_postgres b on 
a.id <> b.id;
+---- RESULTS
+1,'India','Russia'
+1,'India','USA'
+2,'Russia','India'
+2,'Russia','USA'
+3,'USA','India'
+3,'USA','Russia'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.name from country_keystore_postgres a join 
country_keystore_postgres b on a.id <> b.id;
+---- RESULTS
+1,'India','Russia'
+1,'India','USA'
+2,'Russia','India'
+2,'Russia','USA'
+3,'USA','India'
+3,'USA','Russia'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Left join with another table
+select a.id, a.name, b.string_col from country_postgres a left join 
country_keystore_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_postgres a left join 
country_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Right join with another table
+select a.id, a.name, b.string_col from country_postgres a right join 
country_keystore_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_postgres a right join 
country_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Full outer join
+select a.id, a.name, b.string_col from country_postgres a full outer join 
country_keystore_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+select a.id, a.name, b.string_col from country_keystore_postgres a full outer 
join country_postgres b on a.id = b.id;
+---- RESULTS
+1,'India','IN'
+2,'Russia','RU'
+3,'USA','US'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Using IN clause
+select * from country_postgres where name IN ('India', 'USA');
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
+---- QUERY
+select * from country_keystore_postgres where name IN ('India', 'USA');
+---- RESULTS
+1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01
 10:00:00
+3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01
 12:00:00
+---- TYPES
+INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, 
STRING, TIMESTAMP
+====
\ No newline at end of file
diff --git a/tests/custom_cluster/test_ext_data_sources.py 
b/tests/custom_cluster/test_ext_data_sources.py
index b3dbde78d..53a42886a 100644
--- a/tests/custom_cluster/test_ext_data_sources.py
+++ b/tests/custom_cluster/test_ext_data_sources.py
@@ -176,8 +176,95 @@ class TestExtDataSources(CustomClusterTestSuite):
     assert "FailoverInSyncJdbcDataSource" not in result.get_data()
 
 
+class TestHivePostgresJdbcTables(CustomClusterTestSuite):
+  """Tests for hive jdbc postgres tables. """
+
+  @classmethod
+  def get_workload(cls):
+    return 'functional-query'
+
+  @classmethod
+  def setup_class(cls):
+    super(TestHivePostgresJdbcTables, cls).setup_class()
+
+  @pytest.mark.execute_serially
+  def test_postgres_hive_jdbc_tables(self, vector, unique_database):
+    """Run tests for external hive jdbc tables."""
+    hive_sql = """
+    DROP TABLE IF EXISTS {0}.country_postgres;
+    CREATE EXTERNAL TABLE {0}.country_postgres
+    (
+        id INT,
+        name STRING,
+        bool_col BOOLEAN,
+        tinyint_col     SMALLINT,
+        smallint_col    SMALLINT,
+        int_col         INT,
+        bigint_col      BIGINT,
+        float_col       FLOAT,
+        double_col      DOUBLE,
+        date_col        DATE,
+        string_col      STRING,
+        timestamp_col   TIMESTAMP
+    )
+    STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+    TBLPROPERTIES (
+        "hive.sql.database.type" = "POSTGRES",
+        "hive.sql.jdbc.driver" = "org.postgresql.Driver",
+        "hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
+        "hive.sql.dbcp.username" = "hiveuser",
+        "hive.sql.dbcp.password" = "password",
+        "hive.sql.table" = "country"
+    );
+
+    DROP TABLE IF EXISTS {0}.country_keystore_postgres;
+    CREATE EXTERNAL TABLE {0}.country_keystore_postgres
+    (
+        id INT,
+        name STRING,
+        bool_col BOOLEAN,
+        tinyint_col     SMALLINT,
+        smallint_col    SMALLINT,
+        int_col         INT,
+        bigint_col      BIGINT,
+        float_col       FLOAT,
+        double_col      DOUBLE,
+        date_col        DATE,
+        string_col      STRING,
+        timestamp_col   TIMESTAMP
+    )
+    STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+    TBLPROPERTIES (
+        "hive.sql.database.type" = "POSTGRES",
+        "hive.sql.jdbc.driver" = "org.postgresql.Driver",
+        "hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
+        "hive.sql.dbcp.username" = "hiveuser",
+        "hive.sql.dbcp.password.keystore" =
+        "jceks://hdfs/test-warehouse/data-sources/test.jceks",
+        "hive.sql.dbcp.password.key" = "hiveuser",
+        "hive.sql.table" = "country"
+    );
+    """.format(unique_database)
+    try:
+      self.run_stmt_in_hive(hive_sql)
+    except Exception:
+      pytest.xfail(reason="Can't create hive jdbc table.")
+    self.client.execute("INVALIDATE METADATA {0}.country_postgres".
+                        format(unique_database))
+    self.client.execute("INVALIDATE METADATA {0}.country_keystore_postgres".
+                        format(unique_database))
+    # Describing postgres hive jdbc table in Impala.
+    self.client.execute("DESCRIBE 
{0}.country_postgres".format(unique_database))
+    self.client.execute("DESCRIBE 
{0}.country_keystore_postgres".format(unique_database))
+
+  # Select statements are verified in hive-jdbc-postgres-tables.test.
+    self.run_test_case('QueryTest/hive-jdbc-postgres-tables', vector,
+                       use_db=unique_database)
+
+
 class TestMySqlExtJdbcTables(CustomClusterTestSuite):
-  """Impala query tests for external jdbc tables on MySQL server."""
+  """Impala query tests for external jdbc tables on MySQL server.
+  It also includes tests for external hive jdbc tables on mysql."""
 
   @classmethod
   def get_workload(cls):
@@ -193,7 +280,7 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite):
       subprocess.check_call(run_cmd, close_fds=True)
     except subprocess.CalledProcessError as e:
       if e.returncode == 10:
-        pytest.skip("These tests requireadd the docker to be added to sudoer's 
group")
+        pytest.skip("These tests required the docker to be added to sudoer's 
group")
       elif e.returncode == 20:
         pytest.skip("Can't connect to local MySQL server")
       elif e.returncode == 30:
@@ -227,6 +314,82 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite):
     """Run tests for external jdbc tables on MySQL"""
     self.run_test_case('QueryTest/mysql-ext-jdbc-tables', vector, 
use_db=unique_database)
 
+  @pytest.mark.execute_serially
+  def test_mysql_hive_jdbc_tables(self, vector, unique_database):
+    """ Run tests for external hive jdbc tables on mysql"""
+    hive_sql = """
+    ADD JAR hdfs:///test-warehouse/data-sources/jdbc-drivers/mysql-jdbc.jar;
+
+    DROP TABLE IF EXISTS {0}.country_mysql;
+    CREATE EXTERNAL TABLE {0}.country_mysql
+    (
+        id INT,
+        name STRING,
+        bool_col BOOLEAN,
+        tinyint_col     SMALLINT,
+        smallint_col    SMALLINT,
+        int_col         INT,
+        bigint_col      BIGINT,
+        float_col       FLOAT,
+        double_col      DOUBLE,
+        date_col        DATE,
+        string_col      STRING,
+        timestamp_col   TIMESTAMP
+    )
+    STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+    TBLPROPERTIES (
+        "hive.sql.database.type" = "MYSQL",
+        "hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
+        "hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
+        "hive.sql.dbcp.username" = "hiveuser",
+        "hive.sql.dbcp.password" = "password",
+        "hive.sql.table" = "country"
+    );
+
+    DROP TABLE IF EXISTS {0}.country_keystore_mysql;
+    CREATE EXTERNAL TABLE {0}.country_keystore_mysql
+    (
+        id INT,
+        name STRING,
+        bool_col BOOLEAN,
+        tinyint_col     SMALLINT,
+        smallint_col    SMALLINT,
+        int_col         INT,
+        bigint_col      BIGINT,
+        float_col       FLOAT,
+        double_col      DOUBLE,
+        date_col        DATE,
+        string_col      STRING,
+        timestamp_col   TIMESTAMP
+    )
+    STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
+    TBLPROPERTIES (
+        "hive.sql.database.type" = "MYSQL",
+        "hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
+        "hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
+        "hive.sql.dbcp.username" = "hiveuser",
+        "hive.sql.dbcp.password.keystore" =
+        "jceks://hdfs/test-warehouse/data-sources/test.jceks",
+        "hive.sql.dbcp.password.key" = "hiveuser",
+        "hive.sql.table" = "country"
+    );
+    """.format(unique_database)
+    try:
+      self.run_stmt_in_hive(hive_sql)
+    except Exception:
+      pytest.xfail(reason="Can't create hive jdbc table.")
+    self.client.execute("INVALIDATE METADATA {0}.country_mysql"
+                        .format(unique_database))
+    self.client.execute("INVALIDATE METADATA {0}.country_keystore_mysql"
+                        .format(unique_database))
+    # Describing mysql hive jdbc table in Impala.
+    self.client.execute("DESCRIBE {0}.country_mysql".format(unique_database))
+    self.client.execute("DESCRIBE 
{0}.country_keystore_mysql".format(unique_database))
+
+  # Select statements are verified in hive-jdbc-mysql-tables.test.
+    self.run_test_case('QueryTest/hive-jdbc-mysql-tables', vector,
+                       use_db=unique_database)
+
 
 class TestImpalaExtJdbcTables(CustomClusterTestSuite):
   """Impala query tests for external jdbc tables in Impala cluster."""


Reply via email to