On 3/11/19 2:23 AM, Masahiko Sawada wrote:
> On Wed, Feb 6, 2019 at 4:32 PM Pavan Deolasee <pavan.deola...@gmail.com> 
> wrote:
>> Hello,
>>
>> Currently either the table level option `toast_tuple_target` or the compile 
>> time default `TOAST_TUPLE_TARGET` is used to decide whether a new tuple 
>> should be compressed or not. While this works reasonably well for most 
>> situations, at times the user may not want to pay the overhead of toasting, 
>> yet take benefits of inline compression.
>>
>> I would like to propose a new table level option, compress_tuple_target, 
>> which can be set independently of toast_tuple_target, and is checked while 
>> deciding whether to compress the new tuple or not.
>>
>> For example,
>>
>> CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 
>> 250);
>> CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 
>> 2040);
>>
>> -- shouldn't get compressed nor toasted
>> INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
>>
>> -- should get compressed, but not toasted
>> INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
>>
>> -- shouldn't get compressed nor toasted
>> INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
>> INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
>>
>> Without this patch, the second INSERT will not compress the tuple since its 
>> length is less than the toast threshold. With the patch and after setting 
>> table level option, one can compress such tuples.
>>
>> The attached patch implements this idea.
>>
> I like this idea.
>
> The patch seems to need update the part describing on-disk toast
> storage in storage.sgml.
>


Yeah. Meanwhile, here's a rebased version of the patch to keep the cfbot
happy.


cheers


andrew


-- 
Andrew Dunstan                https://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index e94fe2c3b6..374f0f2579 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -1310,6 +1310,27 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>compress_tuple_target</literal> (<type>integer</type>)</term>
+    <listitem>
+     <para>
+     The compress_tuple_target specifies the minimum tuple length required before
+     we try to compress columns marked as Extended or Main and applies only to
+     new tuples - there is no effect on existing rows.
+     By default this parameter is set to allow at least 4 tuples per block,
+     which with the default blocksize will be 2040 bytes. Valid values are
+     between 128 bytes and the (blocksize - header), by default 8160 bytes.
+     If the value is set to a value greater than
+     <literal>toast_tuple_target</literal>, then that will be ignored and the value
+     of <literal>toast_tuple_target</literal> will be used instead.
+     Note that the default setting is often close to optimal, and
+     it is possible that setting this parameter could have negative
+     effects in some cases.
+     This parameter cannot be set for TOAST tables.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>parallel_workers</literal> (<type>integer</type>)</term>
     <listitem>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 3b0b138f24..199f1a166c 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -300,6 +300,15 @@ static relopt_int intRelOpts[] =
 		},
 		TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
 	},
+	{
+		{
+			"compress_tuple_target",
+			"Sets the target tuple length at which columns will be compressed",
+			RELOPT_KIND_HEAP,
+			ShareUpdateExclusiveLock
+		},
+		TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
+	},
 	{
 		{
 			"pages_per_range",
@@ -1377,6 +1386,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
 		{"toast_tuple_target", RELOPT_TYPE_INT,
 		offsetof(StdRdOptions, toast_tuple_target)},
+		{"compress_tuple_target", RELOPT_TYPE_INT,
+		offsetof(StdRdOptions, compress_tuple_target)},
 		{"autovacuum_vacuum_cost_delay", RELOPT_TYPE_REAL,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)},
 		{"autovacuum_vacuum_scale_factor", RELOPT_TYPE_REAL,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3c8a5da0bc..648559c3c8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2111,6 +2111,9 @@ static HeapTuple
 heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 					CommandId cid, int options)
 {
+	int		toast_tuple_threshold;
+	int		compress_tuple_threshold;
+
 	/*
 	 * Parallel operations are required to be strictly read-only in a parallel
 	 * worker.  Parallel inserts are not safe even in the leader in the
@@ -2135,6 +2138,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 	HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
 	tup->t_tableOid = RelationGetRelid(relation);
 
+	toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+								TOAST_TUPLE_THRESHOLD);
+	compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+								toast_tuple_threshold);
+	compress_tuple_threshold = Min(compress_tuple_threshold,
+								   toast_tuple_threshold);
 	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
@@ -2146,7 +2155,14 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 		Assert(!HeapTupleHasExternal(tup));
 		return tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	/*
+	 * Activate toaster if the tuple length is greater than compression limit.
+	 * Note that compress_tuple_threshold must be less than or equal to
+	 * toast_tuple_threshold, so it's enough to only test for
+	 * compress_tuple_threshold.
+	 */
+	else if (HeapTupleHasExternal(tup) ||
+			 tup->t_len > compress_tuple_threshold)
 		return toast_insert_or_update(relation, tup, NULL, options);
 	else
 		return tup;
@@ -3464,9 +3480,26 @@ l2:
 		need_toast = false;
 	}
 	else
+	{
+		int			toast_tuple_threshold;
+		int			compress_tuple_threshold;
+
+		toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+									TOAST_TUPLE_THRESHOLD);
+		compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+									toast_tuple_threshold);
+		compress_tuple_threshold = Min(compress_tuple_threshold,
+									   toast_tuple_threshold);
+
+		/*
+		 * compress_tuple_threshold must be less than or equal to
+		 * toast_tuple_threshold, so enough to only test for
+		 * compress_tuple_threshold.
+		 */
 		need_toast = (HeapTupleHasExternal(&oldtup) ||
 					  HeapTupleHasExternal(newtup) ||
-					  newtup->t_len > TOAST_TUPLE_THRESHOLD);
+					  newtup->t_len > compress_tuple_threshold);
+	}
 
 	pagefree = PageGetHeapFreeSpace(page);
 
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bce4274362..9b7fe81235 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -636,7 +636,15 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 	Size		len;
 	OffsetNumber newoff;
 	HeapTuple	heaptup;
-
+	int			toast_tuple_threshold;
+	int			compress_tuple_threshold;
+
+	toast_tuple_threshold = RelationGetToastTupleTarget(state->rs_new_rel,
+									TOAST_TUPLE_THRESHOLD);
+	compress_tuple_threshold = RelationGetCompressTupleTarget(state->rs_new_rel,
+									toast_tuple_threshold);
+	compress_tuple_threshold = Min(compress_tuple_threshold,
+								   toast_tuple_threshold);
 	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
@@ -650,7 +658,14 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 		Assert(!HeapTupleHasExternal(tup));
 		heaptup = tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	/*
+	 * Activate toaster if the tuple length is greater than compression limit.
+	 * Note that compress_tuple_threshold must be less than or equal to
+	 * toast_tuple_threshold, so it's enough to only test for
+	 * compress_tuple_threshold.
+	 */
+	else if (HeapTupleHasExternal(tup) ||
+			 tup->t_len > compress_tuple_threshold)
 	{
 		int options = HEAP_INSERT_SKIP_FSM;
 
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index cd921a4600..ac0444d05e 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -544,6 +544,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	bool		has_nulls = false;
 
 	Size		maxDataLen;
+	Size		maxCompressLen;
 	Size		hoff;
 
 	char		toast_action[MaxHeapAttributeNumber];
@@ -726,12 +727,19 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	/* now convert to a limit on the tuple data size */
 	maxDataLen = RelationGetToastTupleTarget(rel, TOAST_TUPLE_TARGET) - hoff;
 
+	/*
+	 * Get the limit at which we should apply compression. This will be same as
+	 * maxDataLen unless overridden by the user explicitly.
+	 */
+	maxCompressLen = RelationGetCompressTupleTarget(rel, maxDataLen) - hoff;
+	maxCompressLen = Min(maxCompressLen, maxDataLen);
+
 	/*
 	 * Look for attributes with attstorage 'x' to compress.  Also find large
 	 * attributes with attstorage 'x' or 'e', and store them external.
 	 */
 	while (heap_compute_data_size(tupleDesc,
-								  toast_values, toast_isnull) > maxDataLen)
+								  toast_values, toast_isnull) > maxCompressLen)
 	{
 		int			biggest_attno = -1;
 		int32		biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
@@ -876,7 +884,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	 * compression
 	 */
 	while (heap_compute_data_size(tupleDesc,
-								  toast_values, toast_isnull) > maxDataLen)
+								  toast_values, toast_isnull) > maxCompressLen)
 	{
 		int			biggest_attno = -1;
 		int32		biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 54028515a7..9e17e2a599 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -263,6 +263,7 @@ typedef struct StdRdOptions
 	/* fraction of newly inserted tuples prior to trigger index cleanup */
 	float8		vacuum_cleanup_index_scale_factor;
 	int			toast_tuple_target; /* target for tuple toasting */
+	int			compress_tuple_target; /* target for tuple compression */
 	AutoVacOpts autovacuum;		/* autovacuum-related options */
 	bool		user_catalog_table; /* use as an additional catalog relation */
 	int			parallel_workers;	/* max number of parallel workers */
@@ -279,6 +280,14 @@ typedef struct StdRdOptions
 	((relation)->rd_options ? \
 	 ((StdRdOptions *) (relation)->rd_options)->toast_tuple_target : (defaulttarg))
 
+/*
+ * RelationGetCompressTupleTarget
+ *		Returns the relation's compress_tuple_target.  Note multiple eval of argument!
+ */
+#define RelationGetCompressTupleTarget(relation, defaulttarg) \
+	((relation)->rd_options ? \
+	 ((StdRdOptions *) (relation)->rd_options)->compress_tuple_target : (defaulttarg))
+
 /*
  * RelationGetFillFactor
  *		Returns the relation's fillfactor.  Note multiple eval of argument!
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 189bdffdca..2f1a364b3c 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1209,7 +1209,70 @@ select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class wher
  t
 (1 row)
 
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+SELECT a, pg_column_size(b) FROM compresstest250;
+ a | pg_column_size 
+---+----------------
+ 1 |            204
+ 2 |             25
+(2 rows)
+
+SELECT a, pg_column_size(b) FROM compresstest2040;
+ a | pg_column_size 
+---+----------------
+ 1 |            204
+ 2 |            304
+(2 rows)
+
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ t
+(1 row)
+
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ t
+(1 row)
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ f
+(1 row)
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+ a | pg_column_size 
+---+----------------
+ 4 |           1404
+(1 row)
+
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+ a | pg_column_size 
+---+----------------
+ 4 |           1404
+(1 row)
+
 DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
 --
 -- test substr with toasted bytea values
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f2203ef1b1..1fb04a3a1b 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -386,7 +386,38 @@ INSERT INTO toasttest values (repeat('1234567890',300));
 -- expect 0 blocks
 select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'toasttest'))/current_setting('block_size')::integer as blocks;
 
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+
+SELECT a, pg_column_size(b) FROM compresstest250;
+SELECT a, pg_column_size(b) FROM compresstest2040;
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+
 DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
 
 --
 -- test substr with toasted bytea values

Reply via email to