Hi hackers,

> # Page storage(Plan C)
>
> Further, since the size of the compress address file is fixed, the above 
> address file and data file can also be combined into one file
>
>         0       1       2     123071    0         1         2
> +=======+=======+=======+     +=======+=========+=========+
> | head  |  1    |    2  | ... |       | data1   | data2   | ...  
> +=======+=======+=======+     +=======+=========+=========+
>   head  |              address        |          data          |

I made a prototype according to the above storage method. Any suggestions are 
welcome.

# Page compress file storage related definitions

/*
* layout of Page Compress file:
*
* - PageCompressHeader
* - PageCompressAddr[]
* - chuncks of PageCompressData
*
*/
typedef struct PageCompressHeader
{
     pg_atomic_uint32     nblocks;     /* number of total blocks in this 
segment */
     pg_atomic_uint32     allocated_chunks;     /* number of total allocated 
chunks in data area */
     uint16                    chunk_size;     /* size of each chunk, must be 
1/2 1/4 or 1/8 of BLCKSZ */
     uint8                    algorithm;     /* compress algorithm, 1=pglz, 
2=lz4 */
} PageCompressHeader;

typedef struct PageCompressAddr
{
     uint8                    nchunks;               /* number of chunks for 
this block */
     uint8                    allocated_chunks;     /* number of allocated 
chunks for this block */

     /* variable-length fields, 1 based chunk no array for this block, size of 
the array must be 2, 4 or 8 */
     pc_chunk_number_t     chunknos[FLEXIBLE_ARRAY_MEMBER];
} PageCompressAddr;

typedef struct PageCompressData
{
     char     page_header[SizeOfPageHeaderData];     /* page header */
     uint32     size;                                        /* size of 
compressed data */
     char     data[FLEXIBLE_ARRAY_MEMBER];          /* compressed page, except 
for the page header */
} PageCompressData;


# Usage

Set whether to use compression through storage parameters of tables and indexes

- compress_type
 Set whether to compress and the compression algorithm used, supported values: 
none, pglz, zstd

- compress_chunk_size

 Chunk is the smallest unit of storage space allocated for compressed pages.
 The size of the chunk can only be 1/2, 1/4 or 1/8 of BLCKSZ

- compress_prealloc_chunks

  The number of chunks pre-allocated for each page. The maximum value allowed 
is: BLCKSZ/compress_chunk_size -1.
  If the number of chunks required for a compressed page is less than 
`compress_prealloc_chunks`,
  It allocates `compress_prealloc_chunks` chunks to avoid future storage 
fragmentation when the page needs more storage space.


# Sample

## requirement

- zstd

## build

./configure --with-zstd
make
make install

## create compressed table and index

create table tb1(id int,c1 text);
create table tb1_zstd(id int,c1 text) 
with(compress_type=zstd,compress_chunk_size=1024);
create table tb1_zstd_4(id int,c1 text) 
with(compress_type=zstd,compress_chunk_size=1024,compress_prealloc_chunks=4);

create index tb1_idx_id on tb1(id);
create index tb1_idx_id_zstd on tb1(id) 
with(compress_type=zstd,compress_chunk_size=1024);
create index tb1_idx_id_zstd_4 on tb1(id) 
with(compress_type=zstd,compress_chunk_size=1024,compress_prealloc_chunks=4);

create index tb1_idx_c1 on tb1(c1);
create index tb1_idx_c1_zstd on tb1(c1) 
with(compress_type=zstd,compress_chunk_size=1024);
create index tb1_idx_c1_zstd_4 on tb1(c1) 
with(compress_type=zstd,compress_chunk_size=1024,compress_prealloc_chunks=4);

insert into tb1 select generate_series(1,1000000),md5(random()::text);
insert into tb1_zstd select generate_series(1,1000000),md5(random()::text);
insert into tb1_zstd_4 select generate_series(1,1000000),md5(random()::text);

## show size of table and index

postgres=# \d+
                            List of relations
Schema |    Name    | Type  |  Owner   | Persistence | Size  | Description
--------+------------+-------+----------+-------------+-------+-------------
public | tb1        | table | postgres | permanent   | 65 MB |
public | tb1_zstd   | table | postgres | permanent   | 37 MB |
public | tb1_zstd_4 | table | postgres | permanent   | 37 MB |
(3 rows)

postgres=# \di+
                                    List of relations
Schema |       Name        | Type  |  Owner   | Table | Persistence | Size  | 
Description
--------+-------------------+-------+----------+-------+-------------+-------+-------------
public | tb1_idx_c1        | index | postgres | tb1   | permanent   | 73 MB |
public | tb1_idx_c1_zstd   | index | postgres | tb1   | permanent   | 36 MB |
public | tb1_idx_c1_zstd_4 | index | postgres | tb1   | permanent   | 41 MB |
public | tb1_idx_id        | index | postgres | tb1   | permanent   | 21 MB |
public | tb1_idx_id_zstd   | index | postgres | tb1   | permanent   | 13 MB |
public | tb1_idx_id_zstd_4 | index | postgres | tb1   | permanent   | 15 MB |
(6 rows)


# pgbench performance testing(TPC-B)

Compress the pgbench_accounts table and its primary key index.
The compression parameters are (compress_type=zstd, compress_chunk_size=1024).
Then compare the performance difference between the original table and the 
compressed table.

test command:

   pgbench -i -s 1000
   pgbench -n -T 300 -c 16 -j 16 db1

tps comparison:

   original table  :20081
   compressed table:19984


Comparison of storage space:

                       original    compressed(before benchmark)   
compressed(after benchmark*)
pgbench_accounts        13 GB       1660 MB                        1711 MB
pgbench_accounts_pkey   2142 MB     738 MB                         816 MB

*note:After the benchmark, there are some compressed pages that need 2 chuncks 
to store data


# TODO list

1. support setting of compress level
2. support ALTER TABLE/INDEX xx set(...)
3. support checksum in pg_basebackup, pg_checksum and replication
4. support pg_rewind
5. infomation output for compressed page's meta data


# Problem

When compress_chunk_size=1024, about 4MB of space is needed to store the 
address,
which will cause the space of the small file to become larger after compression.

The solutions considered are as follows:
The address and data of the compressed page are divided into two files, and the 
address file is also divided into disk space as needed, and at least one BLCKSZ 
is allocated for each expansion.


Best Regards
Chen Hujaun

Attachment: page_compress_prototype.patch
Description: Binary data

Reply via email to