On Wed, Sep 25, 2019 at 05:38:34PM -0300, Alvaro Herrera wrote:
Hello, can you please update this patch?


I'm not the patch author, but I've been looking at the patch recently
and I have a rebased version at hand - so attached.

FWIW I believe the patch is solid and in good shape, and it got stuck
after I reported some benchmarks showing somewhat flaky performance. I
know Binguo Bao was trying to reproduce the benchmark, and I assume the
silence means he was not successful :-(

On the larger data set the patch however performed very nicely, so maybe
I just did something stupid while running the smaller one, or maybe it's
just noise (the queries were just a couple of ms in that test). I do
plan to rerun the benchmarks and investigate a bit - if I find the patch
is fine, I'd like to commit it shortly.


regards

--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 7167c0e5f78d74a0c02a76748e7b8caaabe65ccd Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Fri, 27 Sep 2019 00:51:52 +0200
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/common/detoast.c  | 53 +++++++++++++++++++++++-----
 src/common/pg_lzcompress.c           | 27 ++++++++++++++
 src/include/access/toast_internals.h |  1 +
 src/include/common/pg_lzcompress.h   |  1 +
 4 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/src/backend/access/common/detoast.c 
b/src/backend/access/common/detoast.c
index c8b49d6a12..4727c892a8 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -196,6 +196,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
  *
  *             Public entry point to get back part of a toasted value
  *             from compression or external storage.
+ *
+ * Note: when slicelength is negative, return suffix of the value.
  * ----------
  */
 struct varlena *
@@ -217,8 +219,29 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
                if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
                        return toast_fetch_datum_slice(attr, sliceoffset, 
slicelength);
 
-               /* fetch it back (compressed marker will get set automatically) 
*/
-               preslice = toast_fetch_datum(attr);
+               /*
+                * When a prefix of the value is requested, fetch enough slices 
to
+                * decompress. Otherwise, fetch all slices.
+                */
+               if (slicelength > 0 && sliceoffset >= 0)
+               {
+                       int32 max_size;
+
+                       /*
+                        * Determine maximum amount of compressed data needed 
for a prefix
+                        * of a given length (after decompression).
+                        */
+                       max_size = pglz_maximum_compressed_size(sliceoffset + 
slicelength,
+                                                                               
                        TOAST_COMPRESS_SIZE(attr));
+
+                       /*
+                        * Fetch enough compressed slices, compressed marker 
will get set
+                        * automatically.
+                        */
+                       preslice = toast_fetch_datum_slice(attr, 0, max_size);
+               }
+               else
+                       preslice = toast_fetch_datum(attr);
        }
        else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
        {
@@ -476,7 +499,9 @@ toast_fetch_datum(struct varlena *attr)
  *     Reconstruct a segment of a Datum from the chunks saved
  *     in the toast relation
  *
- *     Note that this function only supports non-compressed external datums.
+ *     Note that this function supports non-compressed external datums
+ *     and compressed external datums (in which case the requrested slice
+ *  has to be a prefix, i.e. sliceoffset has to be 0).
  * ----------
  */
 static struct varlena *
@@ -517,10 +542,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 
sliceoffset, int32 length)
        VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
        /*
-        * It's nonsense to fetch slices of a compressed datum -- this isn't 
lo_*
-        * we can't return a compressed datum which is meaningful to toast later
+        * It's meaningful to fetch slices at the start of a compressed datum,
+        * since we can decompress the prefix raw data without decompressing the
+        * whole value.
         */
-       Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+       Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == 
sliceoffset);
 
        attrsize = toast_pointer.va_extsize;
        totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -531,12 +557,23 @@ toast_fetch_datum_slice(struct varlena *attr, int32 
sliceoffset, int32 length)
                length = 0;
        }
 
+       /*
+        * When fetching a prefix of a compressed external datum, account for
+        * the rawsize tracking amount of raw data (it's stored at the beginning
+        * as an int32 value).
+        */
+       if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
+               length = length + sizeof(int32);
+
        if (((sliceoffset + length) > attrsize) || length < 0)
                length = attrsize - sliceoffset;
 
        result = (struct varlena *) palloc(length + VARHDRSZ);
 
-       SET_VARSIZE(result, length + VARHDRSZ);
+       if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+               SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+       else
+               SET_VARSIZE(result, length + VARHDRSZ);
 
        if (length == 0)
                return result;                  /* Can save a lot of work at 
this point! */
@@ -720,7 +757,7 @@ toast_decompress_datum(struct varlena *attr)
        SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
 
        if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
-                                               VARSIZE(attr) - 
TOAST_COMPRESS_HDRSZ,
+                                               TOAST_COMPRESS_SIZE(attr),
                                                VARDATA(result),
                                                TOAST_COMPRESS_RAWSIZE(attr), 
true) < 0)
                elog(ERROR, "compressed data is corrupted");
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b3987d0..18766ade02 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,30 @@ pglz_decompress(const char *source, int32 slen, char *dest,
         */
        return (char *) dp - dest;
 }
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ *             Calculate the maximum compressed size for a given amount of raw 
data.
+ *             Return the maximum size, or total compressed size if maximum 
size is
+ *             larger than total compressed size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+       int32 compressed_size;
+
+       /*
+        * Use int64 to prevent overflow during calculation.
+        */
+       compressed_size = (int32) ((int64) rawsize * 9 + 8) / 8;
+
+       /*
+        * Maximum compressed size can't be larger than total compressed size.
+        */
+       compressed_size = Min(compressed_size, total_compressed_size);
+
+       return compressed_size;
+}
diff --git a/src/include/access/toast_internals.h 
b/src/include/access/toast_internals.h
index 494b07a4b1..b8703d8c94 100644
--- a/src/include/access/toast_internals.h
+++ b/src/include/access/toast_internals.h
@@ -31,6 +31,7 @@ typedef struct toast_compress_header
  */
 #define TOAST_COMPRESS_HDRSZ           ((int32) sizeof(toast_compress_header))
 #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) 
(ptr))->rawsize)
+#define TOAST_COMPRESS_SIZE(ptr)       ((int32) VARSIZE(ptr) - 
TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_RAWDATA(ptr) \
        (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
 #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
diff --git a/src/include/common/pg_lzcompress.h 
b/src/include/common/pg_lzcompress.h
index 555576436c..e59fe2eaea 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, 
char *dest,
                                                   const PGLZ_Strategy 
*strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
                                                         int32 rawsize, bool 
check_complete);
+extern int32 pglz_maximum_compressed_size(int32 rawsize, int32 
total_compressed_size);
 
 #endif                                                 /* _PG_LZCOMPRESS_H_ */
-- 
2.21.0

Reply via email to