[ 
https://issues.apache.org/jira/browse/HIVE-24343?focusedWorklogId=510190&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-510190
 ]

ASF GitHub Bot logged work on HIVE-24343:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Nov/20 11:07
            Start Date: 11/Nov/20 11:07
    Worklog Time Spent: 10m 
      Work Description: vnhive commented on a change in pull request #1640:
URL: https://github.com/apache/hive/pull/1640#discussion_r521282589



##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
##########
@@ -870,44 +870,88 @@ private boolean isViewTable(String catName, String 
dbName, String tblName) throw
       List<? extends Object> paramsForFilter, List<String> joinsForFilter, 
Integer max)
       throws MetaException {
     boolean doTrace = LOG.isDebugEnabled();
+
+    // The JDBC client driver implementations of Derby, PostgreSQL, MySQL, 
MariaDB and Oracle send the number of
+    // parameters being set in a executed statement as a 2 byte signed integer 
to the servers. This indirectly
+    // sets s limit on the number of parameters that can be set to 32767. This 
is also the number of parameters
+    // that can be passed to the JDO executeArray call.
+    final int JDBC_STMT_PARAM_LIMIT = 32767;
+
     final String dbNameLcase = dbName.toLowerCase();
     final String tblNameLcase = tblName.toLowerCase();
     final String catNameLcase = normalizeSpace(catName).toLowerCase();
 
     // We have to be mindful of order during filtering if we are not returning 
all partitions.
+    // If the number of parameters is larger than 32767 and the query is split 
into multiple
+    // parts, the order by would work within each query part.
     String orderForFilter = (max != null) ? " order by \"PART_NAME\" asc" : "";
 
     String queryText =
-        "select " + PARTITIONS + ".\"PART_ID\" from " + PARTITIONS + ""
-      + "  inner join " + TBLS + " on " + PARTITIONS + ".\"TBL_ID\" = " + TBLS 
+ ".\"TBL_ID\" "
-      + "    and " + TBLS + ".\"TBL_NAME\" = ? "
-      + "  inner join " + DBS + " on " + TBLS + ".\"DB_ID\" = " + DBS + 
".\"DB_ID\" "
-      + "     and " + DBS + ".\"NAME\" = ? "
-      + join(joinsForFilter, ' ')
-      + " where " + DBS + ".\"CTLG_NAME\" = ? "
-      + (StringUtils.isBlank(sqlFilter) ? "" : (" and " + sqlFilter)) + 
orderForFilter;
-    Object[] params = new Object[paramsForFilter.size() + 3];
-    params[0] = tblNameLcase;
-    params[1] = dbNameLcase;
-    params[2] = catNameLcase;
-    for (int i = 0; i < paramsForFilter.size(); ++i) {
-      params[i + 3] = paramsForFilter.get(i);
-    }
+            "select " + PARTITIONS + ".\"PART_ID\" from " + PARTITIONS + ""
+                    + "  inner join " + TBLS + " on " + PARTITIONS + 
".\"TBL_ID\" = " + TBLS + ".\"TBL_ID\" "
+                    + "    and " + TBLS + ".\"TBL_NAME\" = ? "
+                    + "  inner join " + DBS + " on " + TBLS + ".\"DB_ID\" = " 
+ DBS + ".\"DB_ID\" "
+                    + "     and " + DBS + ".\"NAME\" = ? "
+                    + join(joinsForFilter, ' ')
+                    + " where " + DBS + ".\"CTLG_NAME\" = ? "
+                    + (StringUtils.isBlank(sqlFilter) ? "" : (" and " + 
sqlFilter)) + orderForFilter;
 
-    long start = doTrace ? System.nanoTime() : 0;
-    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
-    List<Object> sqlResult = executeWithArray(query, params, queryText, ((max 
== null)  ? -1 : max.intValue()));
-    long queryTime = doTrace ? System.nanoTime() : 0;
-    MetastoreDirectSqlUtils.timingTrace(doTrace, queryText, start, queryTime);
-    if (sqlResult.isEmpty()) {
-      return Collections.emptyList(); // no partitions, bail early.
+    int parametersToProcess = paramsForFilter.size();
+
+    List<Long> result = new ArrayList<>();
+
+    // If this method has been called by default we need to query the tables 
once. Hence set the minimum numbner of
+    // iterations to 1.
+    int iterations = 1;
+
+
+    // The first three parameters in the query are the table, database and the 
catalog names. This leaves us
+    // JDBC_STMT_PARAM_LIMIT - 3 parameters that can be passed to the 
underlying JDBC driver. If the number of
+    // input parameters is greater than this limit, we will have to process 
them in multiple iterations.
+    if (parametersToProcess > (JDBC_STMT_PARAM_LIMIT - 3)) {
+      // Find the number of iterations required from the number of input 
parameters and sets of 32764 parameters
+      // that can be created from it.
+      iterations = parametersToProcess / (JDBC_STMT_PARAM_LIMIT - 3);
+      iterations += ((parametersToProcess % (JDBC_STMT_PARAM_LIMIT - 3) != 0) 
? 1 : 0);
     }
 
-    List<Long> result = new ArrayList<>(sqlResult.size());
-    for (Object fields : sqlResult) {
-      result.add(MetastoreDirectSqlUtils.extractSqlLong(fields));
+    for (int i = 0 ; i < iterations; i++) {
+      // The number of filter parameters that can be processed in each 
iteration has to be lesser than or equal to
+      // JDBC_STMT_PARAM_LIMIT - 3
+      int filterParamSize = parametersToProcess > (JDBC_STMT_PARAM_LIMIT - 3) 
? (JDBC_STMT_PARAM_LIMIT - 3) : parametersToProcess;
+
+      Object[] params = new Object[filterParamSize + 3];
+      params[0] = tblNameLcase;
+      params[1] = dbNameLcase;
+      params[2] = catNameLcase;
+
+      // Reduce by the number of parameters that have already been processed.
+      parametersToProcess -= (JDBC_STMT_PARAM_LIMIT - 3);
+
+      for (int j = 0; j < filterParamSize; j++) {
+        // The current set of parameters have to be fetched from the correct 
offset in the input parameters set.
+        params[j + 3] = paramsForFilter.get(((JDBC_STMT_PARAM_LIMIT - 3) * i) 
+ j);
+      }
+
+      long start = doTrace ? System.nanoTime() : 0;
+      Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+      List<Object> sqlResult = MetastoreDirectSqlUtils.executeWithArray(query, 
params, queryText, ((max == null) ? -1 : max.intValue()));
+      long queryTime = doTrace ? System.nanoTime() : 0;
+      MetastoreDirectSqlUtils.timingTrace(doTrace, queryText, start, 
queryTime);
+
+      for (Object fields : sqlResult) {
+        if (max != null && max != -1) {
+          if (max > 0) {
+            max--;
+          } else {
+            break;
+          }
+        }
+        result.add(MetastoreDirectSqlUtils.extractSqlLong(fields));

Review comment:
       This is the query that is executed,
   
   select "PARTITIONS"."PART_ID" from "PARTITIONS"  inner join "TBLS" on 
"PARTITIONS"."TBL_ID" = "TBLS"."TBL_ID"     and "TBLS"."TBL_NAME" = ?   inner 
join "DBS" on "TBLS"."DB_ID" = "DBS"."DB_ID"      and "DBS"."NAME" = ? inner 
join "PARTITION_KEY_VALS" "FILTER0" on "FILTER0"."PART_ID" = 
"PARTITIONS"."PART_ID" and "FILTER0"."INTEGER_IDX" = 0 where "DBS"."CTLG_NAME" 
= ?  and ( (("FILTER0"."PART_KEY_VAL" = ?) or  (("FILTER0"."PART_KEY_VAL" = ?) 
or  (("FILTER0"."PART_KEY_VAL" = ?) or ("FILTER0"."PART_KEY_VAL" = ?)) ) ) )
   
   We pass in unique values to PART_KEY_VAL, hence this ideally should not 
result in duplicates.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 510190)
    Time Spent: 1h 20m  (was: 1h 10m)

> Table partition operations (create, drop, select) fail when the number of 
> partitions is greater than 32767 (signed int)
> -----------------------------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-24343
>                 URL: https://issues.apache.org/jira/browse/HIVE-24343
>             Project: Hive
>          Issue Type: Bug
>          Components: Metastore
>            Reporter: Narayanan Venkateswaran
>            Assignee: Narayanan Venkateswaran
>            Priority: Minor
>              Labels: pull-request-available
>          Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> The table partition operations - create, drop, select access the underlying 
> relation database using JDO, which internally routes the operations through 
> the JDBC driver. Most of the underlying JDBC driver implementations place a 
> limit on the number of parameters that can be passed through a statement 
> implementation. The limitations are as follows,
> postgreSQL - 32767
> (https://www.postgresql.org/message-id/16832734.post%40talk.nabble.com)
> MySQL - 32767 - 2 Byte Integer - num of params
> (https://dev.mysql.com/doc/internals/en/com-stmt-prepare-response.html#packet-COM_STMT_PREPARE_OK)
> Oracle - 32767 -
> https://www.jooq.org/doc/3.12/manual/sql-building/dsl-context/custom-settings/settings-inline-threshold/
> Derby - 32767 - stored in an unsinged integer - Note the Prepared
> Statement implementation here -
> [https://svn.apache.org/repos/asf/db/derby/code/branches/10.1/java/client/org/apache/derby/client/am/PreparedStatement.java]
>  
> These limits should be taken into account when querying the underlying 
> metastore.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to