[ https://issues.apache.org/jira/browse/HIVE-24343?focusedWorklogId=510190&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-510190 ]
ASF GitHub Bot logged work on HIVE-24343: ----------------------------------------- Author: ASF GitHub Bot Created on: 11/Nov/20 11:07 Start Date: 11/Nov/20 11:07 Worklog Time Spent: 10m Work Description: vnhive commented on a change in pull request #1640: URL: https://github.com/apache/hive/pull/1640#discussion_r521282589 ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java ########## @@ -870,44 +870,88 @@ private boolean isViewTable(String catName, String dbName, String tblName) throw List<? extends Object> paramsForFilter, List<String> joinsForFilter, Integer max) throws MetaException { boolean doTrace = LOG.isDebugEnabled(); + + // The JDBC client driver implementations of Derby, PostgreSQL, MySQL, MariaDB and Oracle send the number of + // parameters being set in a executed statement as a 2 byte signed integer to the servers. This indirectly + // sets s limit on the number of parameters that can be set to 32767. This is also the number of parameters + // that can be passed to the JDO executeArray call. + final int JDBC_STMT_PARAM_LIMIT = 32767; + final String dbNameLcase = dbName.toLowerCase(); final String tblNameLcase = tblName.toLowerCase(); final String catNameLcase = normalizeSpace(catName).toLowerCase(); // We have to be mindful of order during filtering if we are not returning all partitions. + // If the number of parameters is larger than 32767 and the query is split into multiple + // parts, the order by would work within each query part. String orderForFilter = (max != null) ? " order by \"PART_NAME\" asc" : ""; String queryText = - "select " + PARTITIONS + ".\"PART_ID\" from " + PARTITIONS + "" - + " inner join " + TBLS + " on " + PARTITIONS + ".\"TBL_ID\" = " + TBLS + ".\"TBL_ID\" " - + " and " + TBLS + ".\"TBL_NAME\" = ? " - + " inner join " + DBS + " on " + TBLS + ".\"DB_ID\" = " + DBS + ".\"DB_ID\" " - + " and " + DBS + ".\"NAME\" = ? " - + join(joinsForFilter, ' ') - + " where " + DBS + ".\"CTLG_NAME\" = ? " - + (StringUtils.isBlank(sqlFilter) ? "" : (" and " + sqlFilter)) + orderForFilter; - Object[] params = new Object[paramsForFilter.size() + 3]; - params[0] = tblNameLcase; - params[1] = dbNameLcase; - params[2] = catNameLcase; - for (int i = 0; i < paramsForFilter.size(); ++i) { - params[i + 3] = paramsForFilter.get(i); - } + "select " + PARTITIONS + ".\"PART_ID\" from " + PARTITIONS + "" + + " inner join " + TBLS + " on " + PARTITIONS + ".\"TBL_ID\" = " + TBLS + ".\"TBL_ID\" " + + " and " + TBLS + ".\"TBL_NAME\" = ? " + + " inner join " + DBS + " on " + TBLS + ".\"DB_ID\" = " + DBS + ".\"DB_ID\" " + + " and " + DBS + ".\"NAME\" = ? " + + join(joinsForFilter, ' ') + + " where " + DBS + ".\"CTLG_NAME\" = ? " + + (StringUtils.isBlank(sqlFilter) ? "" : (" and " + sqlFilter)) + orderForFilter; - long start = doTrace ? System.nanoTime() : 0; - Query query = pm.newQuery("javax.jdo.query.SQL", queryText); - List<Object> sqlResult = executeWithArray(query, params, queryText, ((max == null) ? -1 : max.intValue())); - long queryTime = doTrace ? System.nanoTime() : 0; - MetastoreDirectSqlUtils.timingTrace(doTrace, queryText, start, queryTime); - if (sqlResult.isEmpty()) { - return Collections.emptyList(); // no partitions, bail early. + int parametersToProcess = paramsForFilter.size(); + + List<Long> result = new ArrayList<>(); + + // If this method has been called by default we need to query the tables once. Hence set the minimum numbner of + // iterations to 1. + int iterations = 1; + + + // The first three parameters in the query are the table, database and the catalog names. This leaves us + // JDBC_STMT_PARAM_LIMIT - 3 parameters that can be passed to the underlying JDBC driver. If the number of + // input parameters is greater than this limit, we will have to process them in multiple iterations. + if (parametersToProcess > (JDBC_STMT_PARAM_LIMIT - 3)) { + // Find the number of iterations required from the number of input parameters and sets of 32764 parameters + // that can be created from it. + iterations = parametersToProcess / (JDBC_STMT_PARAM_LIMIT - 3); + iterations += ((parametersToProcess % (JDBC_STMT_PARAM_LIMIT - 3) != 0) ? 1 : 0); } - List<Long> result = new ArrayList<>(sqlResult.size()); - for (Object fields : sqlResult) { - result.add(MetastoreDirectSqlUtils.extractSqlLong(fields)); + for (int i = 0 ; i < iterations; i++) { + // The number of filter parameters that can be processed in each iteration has to be lesser than or equal to + // JDBC_STMT_PARAM_LIMIT - 3 + int filterParamSize = parametersToProcess > (JDBC_STMT_PARAM_LIMIT - 3) ? (JDBC_STMT_PARAM_LIMIT - 3) : parametersToProcess; + + Object[] params = new Object[filterParamSize + 3]; + params[0] = tblNameLcase; + params[1] = dbNameLcase; + params[2] = catNameLcase; + + // Reduce by the number of parameters that have already been processed. + parametersToProcess -= (JDBC_STMT_PARAM_LIMIT - 3); + + for (int j = 0; j < filterParamSize; j++) { + // The current set of parameters have to be fetched from the correct offset in the input parameters set. + params[j + 3] = paramsForFilter.get(((JDBC_STMT_PARAM_LIMIT - 3) * i) + j); + } + + long start = doTrace ? System.nanoTime() : 0; + Query query = pm.newQuery("javax.jdo.query.SQL", queryText); + List<Object> sqlResult = MetastoreDirectSqlUtils.executeWithArray(query, params, queryText, ((max == null) ? -1 : max.intValue())); + long queryTime = doTrace ? System.nanoTime() : 0; + MetastoreDirectSqlUtils.timingTrace(doTrace, queryText, start, queryTime); + + for (Object fields : sqlResult) { + if (max != null && max != -1) { + if (max > 0) { + max--; + } else { + break; + } + } + result.add(MetastoreDirectSqlUtils.extractSqlLong(fields)); Review comment: This is the query that is executed, select "PARTITIONS"."PART_ID" from "PARTITIONS" inner join "TBLS" on "PARTITIONS"."TBL_ID" = "TBLS"."TBL_ID" and "TBLS"."TBL_NAME" = ? inner join "DBS" on "TBLS"."DB_ID" = "DBS"."DB_ID" and "DBS"."NAME" = ? inner join "PARTITION_KEY_VALS" "FILTER0" on "FILTER0"."PART_ID" = "PARTITIONS"."PART_ID" and "FILTER0"."INTEGER_IDX" = 0 where "DBS"."CTLG_NAME" = ? and ( (("FILTER0"."PART_KEY_VAL" = ?) or (("FILTER0"."PART_KEY_VAL" = ?) or (("FILTER0"."PART_KEY_VAL" = ?) or ("FILTER0"."PART_KEY_VAL" = ?)) ) ) ) We pass in unique values to PART_KEY_VAL, hence this ideally should not result in duplicates. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 510190) Time Spent: 1h 20m (was: 1h 10m) > Table partition operations (create, drop, select) fail when the number of > partitions is greater than 32767 (signed int) > ----------------------------------------------------------------------------------------------------------------------- > > Key: HIVE-24343 > URL: https://issues.apache.org/jira/browse/HIVE-24343 > Project: Hive > Issue Type: Bug > Components: Metastore > Reporter: Narayanan Venkateswaran > Assignee: Narayanan Venkateswaran > Priority: Minor > Labels: pull-request-available > Time Spent: 1h 20m > Remaining Estimate: 0h > > The table partition operations - create, drop, select access the underlying > relation database using JDO, which internally routes the operations through > the JDBC driver. Most of the underlying JDBC driver implementations place a > limit on the number of parameters that can be passed through a statement > implementation. The limitations are as follows, > postgreSQL - 32767 > (https://www.postgresql.org/message-id/16832734.post%40talk.nabble.com) > MySQL - 32767 - 2 Byte Integer - num of params > (https://dev.mysql.com/doc/internals/en/com-stmt-prepare-response.html#packet-COM_STMT_PREPARE_OK) > Oracle - 32767 - > https://www.jooq.org/doc/3.12/manual/sql-building/dsl-context/custom-settings/settings-inline-threshold/ > Derby - 32767 - stored in an unsinged integer - Note the Prepared > Statement implementation here - > [https://svn.apache.org/repos/asf/db/derby/code/branches/10.1/java/client/org/apache/derby/client/am/PreparedStatement.java] > > These limits should be taken into account when querying the underlying > metastore. -- This message was sent by Atlassian Jira (v8.3.4#803005)