[ https://issues.apache.org/jira/browse/HIVE-24828?focusedWorklogId=574636&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-574636 ]
ASF GitHub Bot logged work on HIVE-24828: ----------------------------------------- Author: ASF GitHub Bot Created on: 31/Mar/21 06:53 Start Date: 31/Mar/21 06:53 Worklog Time Spent: 10m Work Description: klcopp commented on a change in pull request #2073: URL: https://github.com/apache/hive/pull/2073#discussion_r604639140 ########## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/FilterUtils.java ########## @@ -442,4 +447,35 @@ public static void checkDbAndTableFilters(boolean isFilterEnabled, return compactions; } } + + public static GetLatestCompactionInfoResponse filterLatestCompactionInfoIfEnabled( + boolean isFilterEnabled, MetaStoreFilterHook filterHook, + String catName, String dbName, String tableName, GetLatestCompactionInfoResponse response + ) throws MetaException { + if (isFilterEnabled && response.getCompactionsSize() > 0) { + List<LatestCompactionInfo> compactions = response.getCompactions(); + if (compactions.get(0).getPartitionname() == null) { + // non partitioned table + List<String> tableNames = new ArrayList<>(); + tableNames.add(tableName); + tableNames = filterHook.filterTableNames(catName, dbName, tableNames); + if (tableNames.isEmpty()) { + response.unsetCompactions(); Review comment: Where is this implemented? ########## File path: standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift ########## @@ -1254,6 +1254,22 @@ struct ShowCompactResponse { 1: required list<ShowCompactResponseElement> compacts, } +struct GetLatestCompactionInfoRequest { Review comment: The name of this function seems like it's requesting the last successful/failed/any status compaction for the table/partition. I think it should be clear that only successful and ready for cleaning statuses matter here. How about LatestFinishedCompaction, LatestDataUpdateByCompaction ... something like that? ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ########## @@ -3646,6 +3649,86 @@ public ShowCompactResponse showCompact(ShowCompactRequest rqst) throws MetaExcep } } + /** + * We assume this is only called by metadata cache server to know if there are new base/delta files should be read. + * The query filters compactions by state and only returns SUCCEEDED or READY_FOR_CLEANING compactions because + * only these two states means there are new files ready to be read. + */ + @RetrySemantics.ReadOnly + public GetLatestCompactionInfoResponse getLatestCompactionInfo(GetLatestCompactionInfoRequest rqst) + throws MetaException { + GetLatestCompactionInfoResponse response = new GetLatestCompactionInfoResponse(new ArrayList<>()); + Connection dbConn = null; + PreparedStatement pst = null; + ResultSet rs = null; + try { + try { + dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); + + List<String> params = new ArrayList<>(); + // This query combines the result sets of SUCCEEDED compactions and READY_FOR_CLEANING compactions + // We also sort the result by CC_ID in descending order so that we can keep only the latest record + // according to the order in result set + StringBuilder sb = new StringBuilder() + .append("SELECT * FROM (") Review comment: I'm not sure about this but could you do: select max(id) and group by table & partition? ########## File path: standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift ########## @@ -1254,6 +1254,22 @@ struct ShowCompactResponse { 1: required list<ShowCompactResponseElement> compacts, } +struct GetLatestCompactionInfoRequest { + 1: required string dbname, + 2: required string tablename, + 3: optional list<string> partitionnames, +} + +struct LatestCompactionInfo { Review comment: CompactionInfoStruct will do the job and you can populate it with more information which might be useful in the future. ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ########## @@ -3646,6 +3649,86 @@ public ShowCompactResponse showCompact(ShowCompactRequest rqst) throws MetaExcep } } + /** + * We assume this is only called by metadata cache server to know if there are new base/delta files should be read. + * The query filters compactions by state and only returns SUCCEEDED or READY_FOR_CLEANING compactions because + * only these two states means there are new files ready to be read. + */ + @RetrySemantics.ReadOnly + public GetLatestCompactionInfoResponse getLatestCompactionInfo(GetLatestCompactionInfoRequest rqst) + throws MetaException { + GetLatestCompactionInfoResponse response = new GetLatestCompactionInfoResponse(new ArrayList<>()); + Connection dbConn = null; + PreparedStatement pst = null; + ResultSet rs = null; + try { + try { + dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED); + + List<String> params = new ArrayList<>(); + // This query combines the result sets of SUCCEEDED compactions and READY_FOR_CLEANING compactions + // We also sort the result by CC_ID in descending order so that we can keep only the latest record + // according to the order in result set + StringBuilder sb = new StringBuilder() + .append("SELECT * FROM (") + .append(" SELECT") + .append(" \"CC_ID\", \"CC_DATABASE\", \"CC_TABLE\", \"CC_PARTITION\", \"CC_TYPE\"") + .append(" FROM \"COMPLETED_COMPACTIONS\"") + .append(" WHERE \"CC_STATE\" = " + quoteChar(SUCCEEDED_STATE)) + .append(" UNION ALL") + .append(" SELECT") + .append(" \"CQ_ID\" AS \"CC_ID\", \"CQ_DATABASE\" AS \"CC_DATABASE\"") + .append(" ,\"CQ_TABLE\" AS \"CC_TABLE\", \"CQ_PARTITION\" AS \"CC_PARTITION\"") + .append(" ,\"CQ_TYPE\" AS \"CC_TYPE\"") + .append(" FROM \"COMPACTION_QUEUE\"") + .append(" WHERE \"CQ_STATE\" = " + quoteChar(READY_FOR_CLEANING)) + .append(") AS compactions ") + .append(" WHERE \"CC_DATABASE\"=? AND \"CC_TABLE\"=?"); + params.add(rqst.getDbname()); + params.add(rqst.getTablename()); + if (rqst.getPartitionnames() != null) { Review comment: or list size == 0 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 574636) Time Spent: 4h 20m (was: 4h 10m) > [HMS] Provide new HMS API to return latest committed compaction record for a > given table > ---------------------------------------------------------------------------------------- > > Key: HIVE-24828 > URL: https://issues.apache.org/jira/browse/HIVE-24828 > Project: Hive > Issue Type: Sub-task > Components: HiveServer2 > Reporter: Kishen Das > Assignee: Yu-Wen Lai > Priority: Major > Labels: pull-request-available > Time Spent: 4h 20m > Remaining Estimate: 0h > > We need a new HMS API to return the latest committed compaction record for a > given table. This can be used by a remote cache to decide whether a given > table's file metadata has been compacted or not, in order to decide whether > file metadata has to be refreshed from the file system before serving or it > can serve the current data from the cache. -- This message was sent by Atlassian Jira (v8.3.4#803005)