[ 
https://issues.apache.org/jira/browse/HIVE-24396?focusedWorklogId=572345&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-572345
 ]

ASF GitHub Bot logged work on HIVE-24396:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 25/Mar/21 21:44
            Start Date: 25/Mar/21 21:44
    Worklog Time Spent: 10m 
      Work Description: nrg4878 commented on a change in pull request #2037:
URL: https://github.com/apache/hive/pull/2037#discussion_r601859471



##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/AbstractJDBCConnectorProvider.java
##########
@@ -0,0 +1,311 @@
+package org.apache.hadoop.hive.metastore.dataconnector.jdbc;
+
+import org.apache.hadoop.hive.metastore.ColumnType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.DataConnector;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import 
org.apache.hadoop.hive.metastore.dataconnector.AbstractDataConnectorProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.ConnectException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public abstract class AbstractJDBCConnectorProvider extends 
AbstractDataConnectorProvider {
+  private static Logger LOG = 
LoggerFactory.getLogger(AbstractJDBCConnectorProvider.class);
+  protected static Warehouse warehouse = null;
+
+  // duplicate constants from Constants.java to avoid a dependency on 
hive-common
+  public static final String JDBC_HIVE_STORAGE_HANDLER_ID =
+      "org.apache.hive.storage.jdbc.JdbcStorageHandler";
+  public static final String JDBC_CONFIG_PREFIX = "hive.sql";
+  public static final String JDBC_CATALOG = JDBC_CONFIG_PREFIX + ".catalog";
+  public static final String JDBC_SCHEMA = JDBC_CONFIG_PREFIX + ".schema";
+  public static final String JDBC_TABLE = JDBC_CONFIG_PREFIX + ".table";
+  public static final String JDBC_DATABASE_TYPE = JDBC_CONFIG_PREFIX + 
".database.type";
+  public static final String JDBC_URL = JDBC_CONFIG_PREFIX + ".jdbc.url";
+  public static final String JDBC_DRIVER = JDBC_CONFIG_PREFIX + ".jdbc.driver";
+  public static final String JDBC_USERNAME = JDBC_CONFIG_PREFIX + 
".dbcp.username";
+  public static final String JDBC_PASSWORD = JDBC_CONFIG_PREFIX + 
".dbcp.password";
+  public static final String JDBC_KEYSTORE = JDBC_CONFIG_PREFIX + 
".dbcp.password.keystore";
+  public static final String JDBC_KEY = JDBC_CONFIG_PREFIX + 
".dbcp.password.key";
+  public static final String JDBC_QUERY = JDBC_CONFIG_PREFIX + ".query";
+  public static final String JDBC_QUERY_FIELD_NAMES = JDBC_CONFIG_PREFIX + 
".query.fieldNames";
+  public static final String JDBC_QUERY_FIELD_TYPES = JDBC_CONFIG_PREFIX + 
".query.fieldTypes";
+  public static final String JDBC_SPLIT_QUERY = JDBC_CONFIG_PREFIX + 
".query.split";
+  public static final String JDBC_PARTITION_COLUMN = JDBC_CONFIG_PREFIX + 
".partitionColumn";
+  public static final String JDBC_NUM_PARTITIONS = JDBC_CONFIG_PREFIX + 
".numPartitions";
+  public static final String JDBC_LOW_BOUND = JDBC_CONFIG_PREFIX + 
".lowerBound";
+  public static final String JDBC_UPPER_BOUND = JDBC_CONFIG_PREFIX + 
".upperBound";
+
+  private static final String JDBC_INPUTFORMAT_CLASS = 
"org.apache.hive.storage.jdbc.JdbcInputFormat".intern();
+  private static final String JDBC_OUTPUTFORMAT_CLASS = 
"org.apache.hive.storage.jdbc.JdbcOutputFormat".intern();
+
+  String type = null; // MYSQL, POSTGRES, ORACLE, DERBY, MSSQL, DB2 etc.
+  String driverClassName = null;
+  String jdbcUrl = null;
+  String username = null;
+  String password = null; // TODO convert to byte array
+
+  public AbstractJDBCConnectorProvider(String dbName, DataConnector dataConn) {
+    super(dbName, dataConn);
+    this.type = connector.getType().toUpperCase(); // TODO
+    this.jdbcUrl = connector.getUrl();
+    this.username = connector.getParameters().get(JDBC_USERNAME);
+    this.password = connector.getParameters().get(JDBC_PASSWORD);
+    if (this.password == null) {
+      String keystore = connector.getParameters().get(JDBC_KEYSTORE);
+      String key = connector.getParameters().get(JDBC_KEY);
+      try {
+        char[] keyValue = MetastoreConf.getValueFromKeystore(keystore, key);
+        if (keyValue != null)
+          this.password = new String(keyValue);
+      } catch (IOException i) {
+        LOG.warn("Could not read key value from keystore");
+      }
+    }
+
+    try {
+      warehouse = new Warehouse(MetastoreConf.newMetastoreConf());
+    } catch (MetaException e) { /* ignore */ }
+  }
+
+  @Override public void open() throws ConnectException {
+    try {
+      Class.forName(driverClassName);
+      handle = DriverManager.getConnection(jdbcUrl, username, password);
+      isOpen = true;
+    } catch (ClassNotFoundException cnfe) {
+      LOG.warn("Driver class not found in classpath:" + driverClassName);
+      throw new RuntimeException("Driver class not found:" + driverClassName);
+    } catch (SQLException sqle) {
+      LOG.warn("Could not connect to remote data source at " + jdbcUrl);
+      throw new ConnectException("Could not connect to remote datasource at " 
+ jdbcUrl + ",cause:" + sqle.getMessage());
+    }
+  }
+
+  protected Connection getConnection() {
+    try {
+      if (!isOpen)
+        open();
+    } catch (ConnectException ce) {
+      throw new RuntimeException(ce.getMessage());
+    }
+
+    if (handle instanceof Connection)
+      return (Connection)handle;
+
+    throw new RuntimeException("unexpected type for connection handle");
+  }
+
+  @Override public void close() {
+    if (isOpen) {
+      try {
+        ((Connection)handle).close();
+      } catch (SQLException sqle) {
+        LOG.warn("Could not close jdbc connection to " + jdbcUrl, sqle);
+      }
+    }
+  }
+
+  /**
+   * Returns Hive Table objects from the remote database for tables that match 
a name pattern.
+   * @return List A collection of objects that match the name pattern, null 
otherwise.
+   * @throws MetaException To indicate any failures with executing this API
+   * @param regex
+   */
+  @Override public abstract List<Table> getTables(String regex) throws 
MetaException;
+
+  /**
+   * Returns a list of all table names from the remote database.
+   * @return List A collection of all the table names, null if there are no 
tables.
+   * @throws MetaException To indicate any failures with executing this API
+   */
+  @Override public List<String> getTableNames() throws MetaException {
+    ResultSet rs = null;
+    try {
+      rs = getConnection().getMetaData().getTables(scoped_db, null, null, new 
String[] { "TABLE" });
+      if (rs != null) {
+        List<String> tables = new ArrayList<String>();
+        while(rs.next()) {
+          tables.add(rs.getString(3));
+        }
+        return tables;
+      }
+    } catch (SQLException sqle) {
+      LOG.warn("Could not retrieve table names from remote datasource, cause:" 
+ sqle.getMessage());
+    } finally {
+      try {
+        if (rs != null) {
+          rs.close();
+          rs = null;
+        }
+      } catch(Exception e) { /* ignore */}
+    }
+    return null;
+  }
+
+  protected abstract ResultSet fetchTableMetadata(String tableName) throws 
MetaException;
+
+  protected abstract ResultSet fetchTableNames() throws MetaException;
+
+  /**
+   * Fetch a single table with the given name, returns a Hive Table object 
from the remote database
+   * @return Table A Table object for the matching table, null otherwise.
+   * @throws MetaException To indicate any failures with executing this API
+   * @param tableName
+   */
+  @Override public Table getTable(String tableName) throws MetaException {
+    ResultSet rs = null;
+    Table table = null;
+    try {
+      // rs = fetchTableMetadata(tableName);
+      rs = fetchTableViaDBMetaData(tableName);
+      List<FieldSchema> cols = new ArrayList<>();
+      while (rs.next()) {
+        FieldSchema fs = new FieldSchema();
+        fs.setName(rs.getString("COLUMN_NAME"));
+        fs.setType(getDataType(rs.getString("TYPE_NAME"), 
rs.getInt("COLUMN_SIZE")));
+        fs.setComment("inferred column type");
+        cols.add(fs);
+      }
+
+      if (cols.size() == 0) {
+        // table does not exists or could not be fetched
+        return null;
+      }
+
+      table = buildTableFromColsList(tableName, cols);
+      //Setting the table properties.
+      table.getParameters().put(JDBC_DATABASE_TYPE, this.type);
+      table.getParameters().put(JDBC_DRIVER, this.driverClassName);
+      table.getParameters().put(JDBC_TABLE, tableName);
+      table.getParameters().put(JDBC_URL, this.jdbcUrl);
+      table.getParameters().put(hive_metastoreConstants.META_TABLE_STORAGE, 
JDBC_HIVE_STORAGE_HANDLER_ID);
+      table.getParameters().put("EXTERNAL", "TRUE");
+      Map<String, String> connectorParams = connector.getParameters();
+      for (String param: connectorParams.keySet()) {
+        if (param.startsWith(JDBC_CONFIG_PREFIX)) {
+          table.getParameters().put(param, connectorParams.get(param));
+        }
+      }
+      return table;
+    } catch (Exception e) {
+      LOG.warn("Exception retrieving remote table " + scoped_db + "." + 
tableName + " via data connector "
+          + connector.getName());
+      throw new MetaException("Error retrieving remote table:" + e);
+    } finally {
+      try {
+        if (rs != null) {
+          rs.close();
+        }
+      } catch (Exception ex) { /* ignore */ }
+    }
+  }
+
+  private ResultSet fetchTableViaDBMetaData(String tableName) {
+    ResultSet rs = null;
+    try {
+      rs = getConnection().getMetaData().getColumns(scoped_db, null, 
tableName, null);
+    } catch (SQLException sqle) {
+      LOG.warn("Could not retrieve column names from JDBC table, cause:" + 
sqle.getMessage());

Review comment:
       fixed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 572345)
    Time Spent: 7h 40m  (was: 7.5h)

> [New Feature] Add data connector support for remote datasources
> ---------------------------------------------------------------
>
>                 Key: HIVE-24396
>                 URL: https://issues.apache.org/jira/browse/HIVE-24396
>             Project: Hive
>          Issue Type: Improvement
>          Components: Hive
>            Reporter: Naveen Gangam
>            Assignee: Naveen Gangam
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 7h 40m
>  Remaining Estimate: 0h
>
> This feature work is to be able to support in Hive Metastore to be able to 
> configure data connectors for remote datasources and map databases. We 
> currently have support for remote tables via StorageHandlers like 
> JDBCStorageHandler and HBaseStorageHandler.
> Data connectors are a natural extension to this where we can map an entire 
> database or catalogs instead of individual tables. The tables within are 
> automagically mapped at runtime. The metadata for these tables are not 
> persisted in Hive. They are always mapped and built at runtime. 
> With this feature, we introduce a concept of type for Databases in Hive. 
> NATIVE vs REMOTE. All current databases are NATIVE. To create a REMOTE 
> database, the following syntax is to be used
> CREATE REMOTE DATABASE remote_db USING <dataconnector> WITH DCPROPERTIES 
> (....);
> Will attach a design doc to this jira. 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to