This is an automated email from the ASF dual-hosted git repository. prozsa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 5371e0c6df3e329398712af3ebb739465b947454 Author: Fang-Yu Rao <[email protected]> AuthorDate: Sun Jan 12 21:18:48 2025 -0800 IMPALA-13666: Provide a non-null fileMetadataStats for HdfsPartition IMPALA-13154 added the method getFileMetadataStats() to HdfsPartition.java that would return the file metadata statistics. The method requires the corresponding HdfsPartition instance to have a non-null field of 'fileMetadataStats_'. This patch revises two existing constructors of HdfsPartition to provide a non-null value for 'fileMetadataStats'. This makes it easier for a third party extension to set up and update the field of 'fileMetadataStats_'. A third party extension has to update the field of 'fileMetadataStats_' if it would like to use this field to get the size of the partition since all three fields in 'fileMetadataStats_' are defaulted to 0. A new constructor was also added for HdfsPartition that allows a third party extension to provide their own FileMetadataStats when instantiating an HdfsPartition. To facilitate instantiating a FileMetadataStats, a new constructor was added for FileMetadataStats that takes in a List of FileDescriptor's to construct a FileMetadataStats. Change-Id: I7e690729fcaebb1e380cc61f2b746783c86dcbf7 Reviewed-on: http://gerrit.cloudera.org:8080/22340 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../org/apache/impala/catalog/HdfsPartition.java | 57 +++++++++++++++++++--- .../java/org/apache/impala/catalog/HdfsTable.java | 12 +++++ 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java index 40978e3d5..b712e9a7b 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java @@ -767,11 +767,16 @@ public class HdfsPartition extends CatalogObjectImpl private final long lastRefreshEventId_; // File statistics corresponding to the encoded file descriptors. - private FileMetadataStats fileMetadataStats_ = null; + private final FileMetadataStats fileMetadataStats_; /** - * Constructor. Needed for third party extensions that want to use their own builder - * to construct the object. + * Constructor. Needed for third party extensions that want to use their own builder + * to construct the object. A third party extension has to update the field of + * 'fileMetadataStats_' if it would like to use this field to get the size of the + * partition. + * + * Note that 'partitionIdCounter_' is not accessible to the subclasses in that its + * private. */ protected HdfsPartition(HdfsTable table, long prevId, String partName, List<LiteralExpr> partitionKeyValues, HdfsStorageDescriptor fileFormatDescriptor, @@ -789,9 +794,18 @@ public class HdfsPartition extends CatalogObjectImpl isMarkedCached, accessLevel, hmsParameters, cachedMsPartitionDescriptor, partitionStats, hasIncrementalStats, numRows, writeId, inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L, - /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null); + /*lastRefreshEventId*/-1L, new HdfsTable.FileMetadataStats()); } + /** + * Constructor. Needed for third party extensions that want to use their own builder + * to construct the object. A third party extension has to update the field of + * 'fileMetadataStats_' if it would like to use this field to get the size of the + * partition. + * + * Note that 'partitionIdCounter_' is not accessible to the subclasses in that its + * private. + */ protected HdfsPartition(HdfsTable table, long id, long prevId, String partName, List<LiteralExpr> partitionKeyValues, HdfsStorageDescriptor fileFormatDescriptor, @Nonnull ImmutableList<byte[]> encodedFileDescriptors, @@ -808,7 +822,35 @@ public class HdfsPartition extends CatalogObjectImpl isMarkedCached, accessLevel, hmsParameters, cachedMsPartitionDescriptor, partitionStats, hasIncrementalStats, numRows, writeId, inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L, - /*lastRefreshEventId*/-1L, /*fileMetadataStats*/null); + /*lastRefreshEventId*/-1L, new HdfsTable.FileMetadataStats()); + } + + /** + * Constructor. Needed for third party extensions that want to use their own builder + * to construct the object. It allows third party extensions to provide their own + * FileMetadataStats when instantiating an HdfsPartition. + * + * Note that 'partitionIdCounter_' is not accessible to the subclasses in that its + * private. + */ + protected HdfsPartition(HdfsTable table, long id, long prevId, String partName, + List<LiteralExpr> partitionKeyValues, HdfsStorageDescriptor fileFormatDescriptor, + @Nonnull ImmutableList<byte[]> encodedFileDescriptors, + ImmutableList<byte[]> encodedInsertFileDescriptors, + ImmutableList<byte[]> encodedDeleteFileDescriptors, + HdfsPartitionLocationCompressor.Location location, + boolean isMarkedCached, TAccessLevel accessLevel, Map<String, String> hmsParameters, + CachedHmsPartitionDescriptor cachedMsPartitionDescriptor, + byte[] partitionStats, boolean hasIncrementalStats, long numRows, long writeId, + InFlightEvents inFlightEvents, long createEventId, long lastCompactionId, + HdfsTable.FileMetadataStats fileMetadataStats) { + this(table, partitionIdCounter_.getAndIncrement(), prevId, partName, + partitionKeyValues, fileFormatDescriptor, encodedFileDescriptors, + encodedInsertFileDescriptors, encodedDeleteFileDescriptors, location, + isMarkedCached, accessLevel, hmsParameters, cachedMsPartitionDescriptor, + partitionStats, hasIncrementalStats, numRows, writeId, + inFlightEvents, /*createEventId=*/-1L, /*lastCompactionId*/-1L, + /*lastRefreshEventId*/-1L, fileMetadataStats); } protected HdfsPartition(HdfsTable table, long id, long prevId, String partName, @@ -822,6 +864,7 @@ public class HdfsPartition extends CatalogObjectImpl byte[] partitionStats, boolean hasIncrementalStats, long numRows, long writeId, InFlightEvents inFlightEvents, long createEventId, long lastCompactionId, long lastRefreshEventId, FileMetadataStats fileMetadataStats) { + Preconditions.checkArgument(fileMetadataStats != null); table_ = table; id_ = id; prevId_ = prevId; @@ -1069,7 +1112,7 @@ public class HdfsPartition extends CatalogObjectImpl } public FileMetadataStats getFileMetadataStats() { - return Preconditions.checkNotNull(fileMetadataStats_); + return fileMetadataStats_; } @Override @@ -1177,7 +1220,7 @@ public class HdfsPartition extends CatalogObjectImpl @Override public long getSize() { - if (fileMetadataStats_ != null) { + if (fileMetadataStats_.numFiles > 0) { return fileMetadataStats_.totalFileBytes; } long result = 0; diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index e6331fed0..59b12ce75 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -362,6 +362,18 @@ public class HdfsTable extends Table implements FeFsTable { // Total size (in bytes) of all files in a table/partition. public long totalFileBytes = 0; + public FileMetadataStats() {} + + /** + * This constructor allows third party extensions to instantiate a FileMetadataStats + * with a List of FileDescriptor's. + */ + public FileMetadataStats(List<FileDescriptor> fds) { + for (HdfsPartition.FileDescriptor fd : fds) { + accumulate(fd); + } + } + // Initializes the values of the storage stats. public void init() { numFiles = 0;
