[ https://issues.apache.org/jira/browse/HIVE-23840?focusedWorklogId=458895&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-458895 ]
ASF GitHub Bot logged work on HIVE-23840: ----------------------------------------- Author: ASF GitHub Bot Created on: 14/Jul/20 19:48 Start Date: 14/Jul/20 19:48 Worklog Time Spent: 10m Work Description: pvary commented on a change in pull request #1251: URL: https://github.com/apache/hive/pull/1251#discussion_r454603042 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ########## @@ -1562,20 +1580,31 @@ public int compareTo(CompressedOwid other) { try { final Path[] deleteDeltaDirs = getDeleteDeltaDirsFromSplit(orcSplit); if (deleteDeltaDirs.length > 0) { + FileSystem fs = orcSplit.getPath().getFileSystem(conf); + AcidOutputFormat.Options orcSplitMinMaxWriteIds = + AcidUtils.parseBaseOrDeltaBucketFilename(orcSplit.getPath(), conf); int totalDeleteEventCount = 0; for (Path deleteDeltaDir : deleteDeltaDirs) { - FileSystem fs = deleteDeltaDir.getFileSystem(conf); + if (!isQualifiedDeleteDeltaForSplit(orcSplitMinMaxWriteIds, deleteDeltaDir)) { + continue; + } Path[] deleteDeltaFiles = OrcRawRecordMerger.getDeltaFiles(deleteDeltaDir, bucket, new OrcRawRecordMerger.Options().isCompacting(false), null); for (Path deleteDeltaFile : deleteDeltaFiles) { try { - /** - * todo: we have OrcSplit.orcTail so we should be able to get stats from there - */ - Reader deleteDeltaReader = OrcFile.createReader(deleteDeltaFile, OrcFile.readerOptions(conf)); - if (deleteDeltaReader.getNumberOfRows() <= 0) { + ReaderData readerData = getOrcTail(deleteDeltaFile, conf, cacheTag); + OrcTail orcTail = readerData.orcTail; + if (orcTail.getFooter().getNumberOfRows() <= 0) { continue; // just a safe check to ensure that we are not reading empty delete files. } + OrcRawRecordMerger.KeyInterval deleteKeyInterval = findDeleteMinMaxKeys(orcTail, deleteDeltaFile); + if (!deleteKeyInterval.isIntersects(keyInterval)) { + // If there is no intersection between data and delete delta, do not read delete file + continue; + } + // Create the reader if we got the OrcTail from cache Review comment: Added more comment ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 458895) Time Spent: 1h (was: 50m) > Use LLAP to get orc metadata > ---------------------------- > > Key: HIVE-23840 > URL: https://issues.apache.org/jira/browse/HIVE-23840 > Project: Hive > Issue Type: Improvement > Components: Transactions > Reporter: Peter Vary > Assignee: Peter Vary > Priority: Major > Labels: pull-request-available > Time Spent: 1h > Remaining Estimate: 0h > > HIVE-23824 added the possibility to access ORC metadata. We can use this to > decide which delta files should be read, and which could be omitted. -- This message was sent by Atlassian Jira (v8.3.4#803005)