This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/orc by this push:
new 0ec2e079cd6 [Optimize] Optimize stripe footer multiple reads. (#315)
0ec2e079cd6 is described below
commit 0ec2e079cd6ed78a87929d28c33e1b136933c69d
Author: Qi Chen <[email protected]>
AuthorDate: Wed May 21 15:07:09 2025 +0800
[Optimize] Optimize stripe footer multiple reads. (#315)
---
c++/src/Reader.cc | 9 ++-------
c++/src/StripeStream.cc | 5 +++--
c++/src/StripeStream.hh | 8 +++++---
3 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 4c0144da89e..13e1b82d4e3 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -716,7 +716,7 @@ namespace orc {
return std::unique_ptr<StripeInformation>(new StripeInformationImpl(
stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(),
stripeInfo.footerlength(), stripeInfo.numberofrows(),
contents->stream.get(),
- *contents->pool, contents->compression, contents->blockSize,
contents->readerMetrics));
+ *contents->pool, contents->compression, contents->blockSize,
contents->readerMetrics, nullptr));
}
FileVersion ReaderImpl::getFormatVersion() const {
@@ -1228,7 +1228,7 @@ namespace orc {
currentStripeInfo.offset(), currentStripeInfo.indexlength(),
currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
currentStripeInfo.numberofrows(), contents->stream.get(),
*contents->pool,
- contents->compression, contents->blockSize,
contents->readerMetrics));
+ contents->compression, contents->blockSize, contents->readerMetrics,
¤tStripeFooter));
streams.clear();
contents->stream->beforeReadStripe(std::move(currentStripeInformation),
selectedColumns,
streams);
@@ -1266,11 +1266,6 @@ namespace orc {
if (stringDictFilter != nullptr) {
std::list<std::string> dictFilterColumnNames;
- std::unique_ptr<StripeInformation> currentStripeInformation(new
StripeInformationImpl(
- currentStripeInfo.offset(), currentStripeInfo.indexlength(),
- currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
- currentStripeInfo.numberofrows(), contents->stream.get(),
*contents->pool,
- contents->compression, contents->blockSize,
contents->readerMetrics));
stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation),
dictFilterColumnNames);
std::unordered_map<uint64_t, std::string> columnIdToNameMap;
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 8efa23efa86..56cde6bfe8d 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -133,13 +133,14 @@ namespace orc {
}
void StripeInformationImpl::ensureStripeFooterLoaded() const {
- if (stripeFooter.get() == nullptr) {
+ if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) {
std::unique_ptr<SeekableInputStream> pbStream =
createDecompressor(compression,
std::make_unique<SeekableFileInputStream>(
stream, offset + indexLength + dataLength,
footerLength, memory),
blockSize, memory, metrics);
- stripeFooter = std::make_unique<proto::StripeFooter>();
+ managedStripeFooter = std::make_unique<proto::StripeFooter>();
+ stripeFooter = managedStripeFooter.get();
if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
throw ParseError("Failed to parse the stripe footer");
}
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index 57e51ef76f0..296305091a1 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -134,7 +134,8 @@ namespace orc {
MemoryPool& memory;
CompressionKind compression;
uint64_t blockSize;
- mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
+ mutable proto::StripeFooter* stripeFooter;
+ mutable std::unique_ptr<proto::StripeFooter> managedStripeFooter;
ReaderMetrics* metrics;
void ensureStripeFooterLoaded() const;
@@ -142,7 +143,7 @@ namespace orc {
StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t
_dataLength,
uint64_t _footerLength, uint64_t _numRows,
InputStream* _stream,
MemoryPool& _memory, CompressionKind _compression,
uint64_t _blockSize,
- ReaderMetrics* _metrics)
+ ReaderMetrics* _metrics, proto::StripeFooter*
_stripeFooter)
: offset(_offset),
indexLength(_indexLength),
dataLength(_dataLength),
@@ -152,7 +153,8 @@ namespace orc {
memory(_memory),
compression(_compression),
blockSize(_blockSize),
- metrics(_metrics) {
+ metrics(_metrics),
+ stripeFooter(_stripeFooter) {
// PASS
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]