Hi! > Andrey Borodin <x4...@yandex-team.ru> 于2019年6月29日周六 下午9:48写道:
> Hi! > Please, do not use top-posting, i.e. reply style where you quote whole > message under your response. It makes reading of archives terse. > > > 24 июня 2019 г., в 7:53, Binguo Bao <djydew...@gmail.com> написал(а): > > > >> This is not correct: L bytes of compressed data do not always can be > decoded into at least L bytes of data. At worst we have one control byte > per 8 bytes of literal bytes. This means at most we need (L*9 + 8) / 8 > bytes with current pglz format. > > > > Good catch! I've corrected the related code in the patch. > > ... > > <0001-Optimize-partial-TOAST-decompression-2.patch> > > I've took a look into the code. > I think we should extract function for computation of max_compressed_size > and put it somewhere along with pglz code. Just in case something will > change something about pglz so that they would not forget about compression > algorithm assumption. > > Also I suggest just using 64 bit computation to avoid overflows. And I > think it worth to check if max_compressed_size is whole data and use min of > (max_compressed_size, uncompressed_data_size). > > Also you declared needsize and max_compressed_size too far from use. But > this will be solved by function extraction anyway. > > Thanks! > > Best regards, Andrey Borodin. Thanks for the suggestion. I've extracted function for computation for max_compressed_size and put the function into pg_lzcompress.c. Best regards, Binguo Bao.
From 79a1b4c292a0629df9d7ba3dc04e879aadca7a61 Mon Sep 17 00:00:00 2001 From: BBG <djydew...@gmail.com> Date: Sun, 2 Jun 2019 19:18:46 +0800 Subject: [PATCH] Optimize partial TOAST decompression --- src/backend/access/heap/tuptoaster.c | 24 +++++++++++++++++------- src/common/pg_lzcompress.c | 22 ++++++++++++++++++++++ src/include/common/pg_lzcompress.h | 1 + 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 55d6e91..684f1b2 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -266,6 +266,7 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (VARATT_IS_EXTERNAL_ONDISK(attr)) { struct varatt_external toast_pointer; + int32 max_size; VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -273,8 +274,13 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* fetch it back (compressed marker will get set automatically) */ - preslice = toast_fetch_datum(attr); + max_size = pglz_maximum_compressed_size(sliceoffset + slicelength, + toast_pointer.va_rawsize); + /* + * Be sure to get enough compressed slice + * and compressed marker will get set automatically + */ + preslice = toast_fetch_datum_slice(attr, 0, max_size); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -2031,7 +2037,8 @@ toast_fetch_datum(struct varlena *attr) * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * - * Note that this function only supports non-compressed external datums. + * Note that this function supports non-compressed external datums + * and compressed external datum slices at the start of the object. * ---------- */ static struct varlena * @@ -2072,10 +2079,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* - * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* - * we can't return a compressed datum which is meaningful to toast later + * It's meaningful to fetch slices at the start of a compressed datum. */ - Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); + Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; @@ -2091,7 +2097,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) result = (struct varlena *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + } else { + SET_VARSIZE(result, length + VARHDRSZ); + } if (length == 0) return result; /* Can save a lot of work at this point! */ diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 988b398..2b5f112 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -771,3 +771,25 @@ pglz_decompress(const char *source, int32 slen, char *dest, */ return (char *) dp - dest; } + + + +/* ---------- + * pglz_max_compressed_size - + * + * Calculate the maximum size of the compressed slice corresponding to the + * raw slice. Return the maximum size, or raw size if maximum size is greater + * than raw size. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size) +{ + int32 result; + + /* + * Use int64 to prevent overflow during calculation. + */ + result = (int32)((int64)raw_slice_size * 9 + 8) / 8; + return result > raw_size ? raw_size : result; +} diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h index 5555764..cda3e1d 100644 --- a/src/include/common/pg_lzcompress.h +++ b/src/include/common/pg_lzcompress.h @@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy); extern int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete); +extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size); #endif /* _PG_LZCOMPRESS_H_ */ -- 2.7.4