Hi, hackers! I'm a student participating in GSoC 2019 and my project is related to TOAST slices. When I'm getting familiar with the postgresql codebase, I find that PG_DETOAST_DATUM_SLICE, when to run on a compressed TOAST entry, will fetch all compressed data chunks then extract the relevant slice. Obviously, this is unnecessary, we only need to fetch the data chunks we need.
The patch optimizes partial TOAST decompression. For an example of the improvement possible, this trivial example: --------------------------------------------------------------------- create table slicingtest ( id serial primary key, a text ); insert into slicingtest (a) select repeat('1234567890-=abcdefghijklmnopqrstuvwxyz', 1000000) as a from generate_series(1,100); \timing select sum(length(substr(a, 0, 20))) from slicingtest; --------------------------------------------------------------------- environment: Linux 4.15.0-33-generic #36~16.04.1-Ubuntu x86_64 GNU/Linux On master, I get Time: 28.123 ms (Take ten times average) With the patch, I get Time: 2.306 ms (take ten times average) This seems to have a 10x improvement. If the number of toast data chunks is more, I believe that patch can play a greater role, there are about 200 related TOAST data chunks for each entry in the case. Related discussion: https://www.postgresql.org/message-id/flat/CACowWR07EDm7Y4m2kbhN_jnys%3DBBf9A6768RyQdKm_%3DNpkcaWg%40mail.gmail.com Best regards, Binguo Bao.
From a7c99439ffe309526b57fe26ab367e4b7bf62f39 Mon Sep 17 00:00:00 2001 From: BBG <djydew...@gmail.com> Date: Sun, 2 Jun 2019 19:18:46 +0800 Subject: [PATCH] Optimize partial TOAST decompression --- src/backend/access/heap/tuptoaster.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 55d6e91..7d30538 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -273,8 +273,11 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* fetch it back (compressed marker will get set automatically) */ - preslice = toast_fetch_datum(attr); + /* + * Be sure to get enough compressed slice + * and compressed marker will get set automatically + */ + preslice = toast_fetch_datum_slice(attr, 0, sliceoffset + slicelength + 1); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -2031,7 +2034,8 @@ toast_fetch_datum(struct varlena *attr) * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * - * Note that this function only supports non-compressed external datums. + * Note that this function supports non-compressed external datums + * and compressed external datum slices at the start of the object. * ---------- */ static struct varlena * @@ -2072,10 +2076,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* - * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* - * we can't return a compressed datum which is meaningful to toast later + * It's meaningful to fetch slices at the start of a compressed datum. */ - Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); + Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; @@ -2091,7 +2094,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) result = (struct varlena *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + } else { + SET_VARSIZE(result, length + VARHDRSZ); + } if (length == 0) return result; /* Can save a lot of work at this point! */ -- 2.7.4