Hi!

> Andrey Borodin <x4...@yandex-team.ru> 于2019年6月29日周六 下午9:48写道:

> Hi!
> Please, do not use top-posting, i.e. reply style where you quote whole
> message under your response. It makes reading of archives terse.
>
> > 24 июня 2019 г., в 7:53, Binguo Bao <djydew...@gmail.com> написал(а):
> >
> >> This is not correct: L bytes of compressed data do not always can be
> decoded into at least L bytes of data. At worst we have one control byte
> per 8 bytes of literal bytes. This means at most we need (L*9 + 8) / 8
> bytes with current pglz format.
> >
> > Good catch! I've corrected the related code in the patch.
> > ...
> > <0001-Optimize-partial-TOAST-decompression-2.patch>
>
> I've took a look into the code.
> I think we should extract function for computation of max_compressed_size
> and put it somewhere along with pglz code. Just in case something will
> change something about pglz so that they would not forget about compression
> algorithm assumption.
>
> Also I suggest just using 64 bit computation to avoid overflows. And I
> think it worth to check if max_compressed_size is whole data and use min of
> (max_compressed_size, uncompressed_data_size).
>
> Also you declared needsize and max_compressed_size too far from use. But
> this will be solved by function extraction anyway.
>
> Thanks!
>
> Best regards, Andrey Borodin.


Thanks for the suggestion.
I've extracted function for computation for max_compressed_size and put the
function into pg_lzcompress.c.

Best regards, Binguo Bao.
From 79a1b4c292a0629df9d7ba3dc04e879aadca7a61 Mon Sep 17 00:00:00 2001
From: BBG <djydew...@gmail.com>
Date: Sun, 2 Jun 2019 19:18:46 +0800
Subject: [PATCH] Optimize partial TOAST decompression

---
 src/backend/access/heap/tuptoaster.c | 24 +++++++++++++++++-------
 src/common/pg_lzcompress.c           | 22 ++++++++++++++++++++++
 src/include/common/pg_lzcompress.h   |  1 +
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 55d6e91..684f1b2 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -266,6 +266,7 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
 	{
 		struct varatt_external toast_pointer;
+		int32 max_size;
 
 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
@@ -273,8 +274,13 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
 
-		/* fetch it back (compressed marker will get set automatically) */
-		preslice = toast_fetch_datum(attr);
+		max_size = pglz_maximum_compressed_size(sliceoffset + slicelength,
+												toast_pointer.va_rawsize);
+		/*
+		 * Be sure to get enough compressed slice
+		 * and compressed marker will get set automatically
+		 */
+		preslice = toast_fetch_datum_slice(attr, 0, max_size);
 	}
 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
 	{
@@ -2031,7 +2037,8 @@ toast_fetch_datum(struct varlena *attr)
  *	Reconstruct a segment of a Datum from the chunks saved
  *	in the toast relation
  *
- *	Note that this function only supports non-compressed external datums.
+ *	Note that this function supports non-compressed external datums
+ *	and compressed external datum slices at the start of the object.
  * ----------
  */
 static struct varlena *
@@ -2072,10 +2079,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
 	/*
-	 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
-	 * we can't return a compressed datum which is meaningful to toast later
+	 * It's meaningful to fetch slices at the start of a compressed datum.
 	 */
-	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
+	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
 
 	attrsize = toast_pointer.va_extsize;
 	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
@@ -2091,7 +2097,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 
 	result = (struct varlena *) palloc(length + VARHDRSZ);
 
-	SET_VARSIZE(result, length + VARHDRSZ);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) {
+		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+	} else {
+		SET_VARSIZE(result, length + VARHDRSZ);
+	}
 
 	if (length == 0)
 		return result;			/* Can save a lot of work at this point! */
diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c
index 988b398..2b5f112 100644
--- a/src/common/pg_lzcompress.c
+++ b/src/common/pg_lzcompress.c
@@ -771,3 +771,25 @@ pglz_decompress(const char *source, int32 slen, char *dest,
 	 */
 	return (char *) dp - dest;
 }
+
+
+
+/* ----------
+ * pglz_max_compressed_size -
+ *
+ * 		Calculate the maximum size of the compressed slice corresponding to the
+ * 		raw slice. Return the maximum size, or raw size if maximum size is greater
+ * 		than raw size.
+ * ----------
+ */
+int32
+pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size)
+{
+	int32 result;
+
+	/*
+	 * Use int64 to prevent overflow during calculation.
+	 */
+	result = (int32)((int64)raw_slice_size * 9 + 8) / 8;
+	return result > raw_size ? raw_size : result;
+}
diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h
index 5555764..cda3e1d 100644
--- a/src/include/common/pg_lzcompress.h
+++ b/src/include/common/pg_lzcompress.h
@@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest,
 						   const PGLZ_Strategy *strategy);
 extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
 							 int32 rawsize, bool check_complete);
+extern int32 pglz_maximum_compressed_size(int32 raw_slice_size, int32 raw_size);
 
 #endif							/* _PG_LZCOMPRESS_H_ */
-- 
2.7.4

Reply via email to