This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c16567eb3cb [opt](parquet) change parquet init footer read size to 
48KB (#46904)
c16567eb3cb is described below

commit c16567eb3cb9bb6140e0e5024c2bb39e822c6b29
Author: Mingyu Chen (Rayner) <morning...@163.com>
AuthorDate: Thu Jan 16 13:58:49 2025 +0800

    [opt](parquet) change parquet init footer read size to 48KB (#46904)
    
    ### What problem does this PR solve?
    
    Change the initial footer read size from 128KB to 48KB, to slightly
    reduce the read size.
    This is same as presto/trino, because typically, a 1GB parquet file
    usually has footer with size 30~40KB.
    
    And usercase shows when there are 30 thousands parquet file, the parse
    footer time can reduce from:
    
    ```
    ParseFooterTime:  avg  2s28ms,  max  3s707ms,  min  905.866ms
    ```
    to
    ```
    ParseFooterTime:  avg  886.364ms,  max  1s734ms,  min  391.846ms
    ```
---
 be/src/vec/exec/format/parquet/parquet_thrift_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h 
b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
index b767f177f4a..1c04d748ca5 100644
--- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h
+++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
@@ -34,7 +34,7 @@ namespace doris::vectorized {
 
 constexpr uint8_t PARQUET_VERSION_NUMBER[4] = {'P', 'A', 'R', '1'};
 constexpr uint32_t PARQUET_FOOTER_SIZE = 8;
-constexpr size_t INIT_META_SIZE = 128 * 1024; // 128k
+constexpr size_t INIT_META_SIZE = 48 * 1024; // 48k
 
 static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** 
file_metadata,
                                   size_t* meta_size, io::IOContext* io_ctx) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to