On Wed, Sep 25, 2019 at 05:38:34PM -0300, Alvaro Herrera wrote:
Hello, can you please update this patch?
I'm not the patch author, but I've been looking at the patch recently
and I have a rebased version at hand - so attached.
FWIW I believe the patch is solid and in good shape, and it got stuck
after I reported some benchmarks showing somewhat flaky performance. I
know Binguo Bao was trying to reproduce the benchmark, and I assume the
silence means he was not successful :-(
On the larger data set the patch however performed very nicely, so maybe
I just did something stupid while running the smaller one, or maybe it's
just noise (the queries were just a couple of ms in that test). I do
plan to rerun the benchmarks and investigate a bit - if I find the patch
is fine, I'd like to commit it shortly.
regards
--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 7167c0e5f78d74a0c02a76748e7b8caaabe65ccd Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Fri, 27 Sep 2019 00:51:52 +0200
Subject: [PATCH] Optimize partial TOAST decompression
---
src/backend/access/common/detoast.c | 53 +++++++++++++++++++++++-----
src/common/pg_lzcompress.c | 27 ++++++++++++++
src/include/access/toast_internals.h | 1 +
src/include/common/pg_lzcompress.h | 1 +
4 files changed, 74 insertions(+), 8 deletions(-)
diff --git a/src/backend/access/common/detoast.c
b/src/backend/access/common/detoast.c
index c8b49d6a12..4727c892a8 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -196,6 +196,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
*
* Public entry point to get back part of a toasted value
* from compression or external storage.
+ *
+ * Note: when slicelength is negative, return suffix of the value.
* ----------
*/
struct varlena *
@@ -217,8 +219,29 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
return toast_fetch_datum_slice(attr, sliceoffset,
slicelength);
- /* fetch it back (compressed marker will get set automatically)
*/
- preslice = toast_fetch_datum(attr);
+ /*
+ * When a prefix of the value is requested, fetch enough slices
to
+ * decompress. Otherwise, fetch all slices.
+ */
+ if (slicelength > 0 && sliceoffset >= 0)
+ {
+ int32 max_size;
+
+ /*
+ * Determine maximum amount of compressed data needed
for a prefix
+ * of a given length (after decompression).
+ */
+ max_size = pglz_maximum_compressed_size(sliceoffset +
slicelength,
+
TOAST_COMPRESS_SIZE(attr));
+
+ /*
+ * Fetch enough compressed slices, compressed marker
will get set
+ * automatically.
+ */
+ preslice = toast_fetch_datum_slice(attr, 0, max_size);
+ }
+ else
+ preslice = toast_fetch_datum(attr);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
@@ -476,7 +499,9 @@ toast_fetch_datum(struct varlena *attr)
* Reconstruct a segment of a Datum from the chunks saved
* in the toast relation
*
- * Note that this function only supports non-compressed external datums.
+ * Note that this function supports non-compressed external datums
+ * and compressed external datums (in which case the requrested slice
+ * has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
static struct varlena *
@@ -517,10 +542,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32
sliceoffset, int32 length)
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
/*
- * It's nonsense to fetch slices of a compressed datum -- this isn't
lo_*
- * we can't return a compressed datum which is meaningful to toast later
+ * It's meaningful to fetch slices at the start of a compressed datum,
+ * since we can decompress the prefix raw data without decompressing the
+ * whole value.
*/
- Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+ Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 ==
sliceoffset);
attrsize = toast_pointer.va_extsize;
totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -531,12 +557,23 @@ toast_fetch_datum_slice(struct varlena *attr, int32
sliceoffset, int32 length)
length = 0;
}
+ /*
+ * When fetching a prefix of a compressed external datum, account for
+ * the rawsize tracking amount of raw data (it's stored at the beginning
+ * as an int32 value).
+ */
+ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
+ length = length + sizeof(int32);
+
if (((sliceoffset + length) > attrsize) || length < 0)
length = attrsize - sliceoffset;
result = (struct varlena *) palloc(length + VARHDRSZ);
- SET_VARSIZE(result, length + VARHDRSZ);
+ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+ else
+ SET_VARSIZE(result, length + VARHDRSZ);
if (length == 0)
return result; /* Can save a lot of work at
this point! */
@@ -720,7 +757,7 @@ toast_decompress_datum(struct varlena *attr)
SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
- VARSIZE(attr) -
TOAST_COMPRESS_HDRSZ,
+ TOAST_COMPRESS_SIZE(attr),
VARDATA(result),
TOAST_COMPRESS_RAWSIZE(attr),
true) < 0)
elog(ERROR, "compressed data is corrupted");
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b3987d0..18766ade02 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,30 @@ pglz_decompress(const char *source, int32 slen, char *dest,
*/
return (char *) dp - dest;
}
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * Calculate the maximum compressed size for a given amount of raw
data.
+ * Return the maximum size, or total compressed size if maximum
size is
+ * larger than total compressed size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size)
+{
+ int32 compressed_size;
+
+ /*
+ * Use int64 to prevent overflow during calculation.
+ */
+ compressed_size = (int32) ((int64) rawsize * 9 + 8) / 8;
+
+ /*
+ * Maximum compressed size can't be larger than total compressed size.
+ */
+ compressed_size = Min(compressed_size, total_compressed_size);
+
+ return compressed_size;
+}
diff --git a/src/include/access/toast_internals.h
b/src/include/access/toast_internals.h
index 494b07a4b1..b8703d8c94 100644
--- a/src/include/access/toast_internals.h
+++ b/src/include/access/toast_internals.h
@@ -31,6 +31,7 @@ typedef struct toast_compress_header
*/
#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *)
(ptr))->rawsize)
+#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE(ptr) -
TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_RAWDATA(ptr) \
(((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
diff --git a/src/include/common/pg_lzcompress.h
b/src/include/common/pg_lzcompress.h
index 555576436c..e59fe2eaea 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen,
char *dest,
const PGLZ_Strategy
*strategy);
extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
int32 rawsize, bool
check_complete);
+extern int32 pglz_maximum_compressed_size(int32 rawsize, int32
total_compressed_size);
#endif /* _PG_LZCOMPRESS_H_ */
--
2.21.0