Hello,

Currently either the table level option `toast_tuple_target` or the compile
time default `TOAST_TUPLE_TARGET` is used to decide whether a new tuple
should be compressed or not. While this works reasonably well for most
situations, at times the user may not want to pay the overhead of toasting,
yet take benefits of inline compression.

I would like to propose a new table level option, compress_tuple_target,
which can be set independently of toast_tuple_target, and is checked while
deciding whether to compress the new tuple or not.

For example,

CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target =
250);
CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target =
2040);

-- shouldn't get compressed nor toasted
INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));

-- should get compressed, but not toasted
INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));

-- shouldn't get compressed nor toasted
INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));

Without this patch, the second INSERT will not compress the tuple since its
length is less than the toast threshold. With the patch and after setting
table level option, one can compress such tuples.

The attached patch implements this idea.

Thanks,
Pavan

-- 
 Pavan Deolasee                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 22dbc07b23..149d6dd28f 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -1310,6 +1310,27 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>compress_tuple_target</literal> (<type>integer</type>)</term>
+    <listitem>
+     <para>
+     The compress_tuple_target specifies the minimum tuple length required before
+     we try to compress columns marked as Extended or Main and applies only to
+     new tuples - there is no effect on existing rows.
+     By default this parameter is set to allow at least 4 tuples per block,
+     which with the default blocksize will be 2040 bytes. Valid values are
+     between 128 bytes and the (blocksize - header), by default 8160 bytes.
+     If the value is set to a value greater than
+     <literal>toast_tuple_target</literal>, then that will be ignored and the value
+     of <literal>toast_tuple_target</literal> will be used instead.
+     Note that the default setting is often close to optimal, and
+     it is possible that setting this parameter could have negative
+     effects in some cases.
+     This parameter cannot be set for TOAST tables.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>parallel_workers</literal> (<type>integer</type>)</term>
     <listitem>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index cdf1f4af62..db2f875721 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -300,6 +300,15 @@ static relopt_int intRelOpts[] =
 		},
 		TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
 	},
+	{
+		{
+			"compress_tuple_target",
+			"Sets the target tuple length at which columns will be compressed",
+			RELOPT_KIND_HEAP,
+			ShareUpdateExclusiveLock
+		},
+		TOAST_TUPLE_TARGET, 128, TOAST_TUPLE_TARGET_MAIN
+	},
 	{
 		{
 			"pages_per_range",
@@ -1379,6 +1388,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
 		{"toast_tuple_target", RELOPT_TYPE_INT,
 		offsetof(StdRdOptions, toast_tuple_target)},
+		{"compress_tuple_target", RELOPT_TYPE_INT,
+		offsetof(StdRdOptions, compress_tuple_target)},
 		{"autovacuum_vacuum_scale_factor", RELOPT_TYPE_REAL,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_scale_factor)},
 		{"autovacuum_analyze_scale_factor", RELOPT_TYPE_REAL,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index dc3499349b..ec48b25b6c 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2351,6 +2351,9 @@ static HeapTuple
 heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 					CommandId cid, int options)
 {
+	int		toast_tuple_threshold;
+	int		compress_tuple_threshold;
+
 	/*
 	 * Parallel operations are required to be strictly read-only in a parallel
 	 * worker.  Parallel inserts are not safe even in the leader in the
@@ -2375,6 +2378,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 	HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
 	tup->t_tableOid = RelationGetRelid(relation);
 
+	toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+								TOAST_TUPLE_THRESHOLD);
+	compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+								toast_tuple_threshold);
+	compress_tuple_threshold = Min(compress_tuple_threshold,
+								   toast_tuple_threshold);
 	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
@@ -2386,7 +2395,14 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
 		Assert(!HeapTupleHasExternal(tup));
 		return tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	/*
+	 * Activate toaster if the tuple length is greater than compression limit.
+	 * Note that compress_tuple_threshold must be less than or equal to
+	 * toast_tuple_threshold, so it's enough to only test for
+	 * compress_tuple_threshold.
+	 */
+	else if (HeapTupleHasExternal(tup) ||
+			 tup->t_len > compress_tuple_threshold)
 		return toast_insert_or_update(relation, tup, NULL, options);
 	else
 		return tup;
@@ -3704,9 +3720,26 @@ l2:
 		need_toast = false;
 	}
 	else
+	{
+		int			toast_tuple_threshold;
+		int			compress_tuple_threshold;
+
+		toast_tuple_threshold = RelationGetToastTupleTarget(relation,
+									TOAST_TUPLE_THRESHOLD);
+		compress_tuple_threshold = RelationGetCompressTupleTarget(relation,
+									toast_tuple_threshold);
+		compress_tuple_threshold = Min(compress_tuple_threshold,
+									   toast_tuple_threshold);
+
+		/*
+		 * compress_tuple_threshold must be less than or equal to
+		 * toast_tuple_threshold, so enough to only test for
+		 * compress_tuple_threshold.
+		 */
 		need_toast = (HeapTupleHasExternal(&oldtup) ||
 					  HeapTupleHasExternal(newtup) ||
-					  newtup->t_len > TOAST_TUPLE_THRESHOLD);
+					  newtup->t_len > compress_tuple_threshold);
+	}
 
 	pagefree = PageGetHeapFreeSpace(page);
 
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index f5cf9ffc9c..b7fc214d5e 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -636,7 +636,15 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 	Size		len;
 	OffsetNumber newoff;
 	HeapTuple	heaptup;
-
+	int			toast_tuple_threshold;
+	int			compress_tuple_threshold;
+
+	toast_tuple_threshold = RelationGetToastTupleTarget(state->rs_new_rel,
+									TOAST_TUPLE_THRESHOLD);
+	compress_tuple_threshold = RelationGetCompressTupleTarget(state->rs_new_rel,
+									toast_tuple_threshold);
+	compress_tuple_threshold = Min(compress_tuple_threshold,
+								   toast_tuple_threshold);
 	/*
 	 * If the new tuple is too big for storage or contains already toasted
 	 * out-of-line attributes from some other relation, invoke the toaster.
@@ -650,7 +658,14 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 		Assert(!HeapTupleHasExternal(tup));
 		heaptup = tup;
 	}
-	else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
+	/*
+	 * Activate toaster if the tuple length is greater than compression limit.
+	 * Note that compress_tuple_threshold must be less than or equal to
+	 * toast_tuple_threshold, so it's enough to only test for
+	 * compress_tuple_threshold.
+	 */
+	else if (HeapTupleHasExternal(tup) ||
+			 tup->t_len > compress_tuple_threshold)
 	{
 		int options = HEAP_INSERT_SKIP_FSM;
 
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index cd921a4600..ac0444d05e 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -544,6 +544,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	bool		has_nulls = false;
 
 	Size		maxDataLen;
+	Size		maxCompressLen;
 	Size		hoff;
 
 	char		toast_action[MaxHeapAttributeNumber];
@@ -726,12 +727,19 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	/* now convert to a limit on the tuple data size */
 	maxDataLen = RelationGetToastTupleTarget(rel, TOAST_TUPLE_TARGET) - hoff;
 
+	/*
+	 * Get the limit at which we should apply compression. This will be same as
+	 * maxDataLen unless overridden by the user explicitly.
+	 */
+	maxCompressLen = RelationGetCompressTupleTarget(rel, maxDataLen) - hoff;
+	maxCompressLen = Min(maxCompressLen, maxDataLen);
+
 	/*
 	 * Look for attributes with attstorage 'x' to compress.  Also find large
 	 * attributes with attstorage 'x' or 'e', and store them external.
 	 */
 	while (heap_compute_data_size(tupleDesc,
-								  toast_values, toast_isnull) > maxDataLen)
+								  toast_values, toast_isnull) > maxCompressLen)
 	{
 		int			biggest_attno = -1;
 		int32		biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
@@ -876,7 +884,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
 	 * compression
 	 */
 	while (heap_compute_data_size(tupleDesc,
-								  toast_values, toast_isnull) > maxDataLen)
+								  toast_values, toast_isnull) > maxCompressLen)
 	{
 		int			biggest_attno = -1;
 		int32		biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 1d05465303..568f503493 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -250,6 +250,7 @@ typedef struct StdRdOptions
 	/* fraction of newly inserted tuples prior to trigger index cleanup */
 	float8		vacuum_cleanup_index_scale_factor;
 	int			toast_tuple_target; /* target for tuple toasting */
+	int			compress_tuple_target; /* target for tuple compression */
 	AutoVacOpts autovacuum;		/* autovacuum-related options */
 	bool		user_catalog_table; /* use as an additional catalog relation */
 	int			parallel_workers;	/* max number of parallel workers */
@@ -266,6 +267,14 @@ typedef struct StdRdOptions
 	((relation)->rd_options ? \
 	 ((StdRdOptions *) (relation)->rd_options)->toast_tuple_target : (defaulttarg))
 
+/*
+ * RelationGetCompressTupleTarget
+ *		Returns the relation's compress_tuple_target.  Note multiple eval of argument!
+ */
+#define RelationGetCompressTupleTarget(relation, defaulttarg) \
+	((relation)->rd_options ? \
+	 ((StdRdOptions *) (relation)->rd_options)->compress_tuple_target : (defaulttarg))
+
 /*
  * RelationGetFillFactor
  *		Returns the relation's fillfactor.  Note multiple eval of argument!
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 189bdffdca..2f1a364b3c 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1209,7 +1209,70 @@ select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class wher
  t
 (1 row)
 
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+SELECT a, pg_column_size(b) FROM compresstest250;
+ a | pg_column_size 
+---+----------------
+ 1 |            204
+ 2 |             25
+(2 rows)
+
+SELECT a, pg_column_size(b) FROM compresstest2040;
+ a | pg_column_size 
+---+----------------
+ 1 |            204
+ 2 |            304
+(2 rows)
+
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ t
+(1 row)
+
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ t
+(1 row)
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+ blocks 
+--------
+ f
+(1 row)
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+ a | pg_column_size 
+---+----------------
+ 4 |           1404
+(1 row)
+
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+ a | pg_column_size 
+---+----------------
+ 4 |           1404
+(1 row)
+
 DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
 --
 -- test substr with toasted bytea values
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f2203ef1b1..1fb04a3a1b 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -386,7 +386,38 @@ INSERT INTO toasttest values (repeat('1234567890',300));
 -- expect 0 blocks
 select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'toasttest'))/current_setting('block_size')::integer as blocks;
 
+CREATE TABLE compresstest250 (a int, b text) WITH (compress_tuple_target = 250);
+CREATE TABLE compresstest2040 (a int, b text) WITH (compress_tuple_target = 2040);
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest250 VALUES (1, repeat('1234567890',20));
+-- should get compressed, but not toasted
+INSERT INTO compresstest250 VALUES (2, repeat('1234567890',30));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (1, repeat('1234567890',20));
+-- shouldn't get compressed nor toasted
+INSERT INTO compresstest2040 VALUES (2, repeat('1234567890',30));
+
+SELECT a, pg_column_size(b) FROM compresstest250;
+SELECT a, pg_column_size(b) FROM compresstest2040;
+-- expect 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest2040'))/current_setting('block_size')::integer as blocks;
+
+-- should get compressed, and toasted
+INSERT INTO compresstest250 SELECT 3, string_agg('', md5(random()::text)) FROM generate_series(1,300);
+-- expect > 0 blocks
+select 0 = pg_relation_size('pg_toast.pg_toast_'||(select oid from pg_class where relname = 'compresstest250'))/current_setting('block_size')::integer as blocks;
+
+ALTER TABLE compresstest250 RENAME TO compresstest1500;
+ALTER TABLE compresstest1500 SET (compress_tuple_target=1500);
+INSERT INTO compresstest1500 VALUES (4, repeat('1234567890',140));
+INSERT INTO compresstest2040 VALUES (4, repeat('1234567890',140));
+
+SELECT a, pg_column_size(b) FROM compresstest1500 WHERE a = 4;
+SELECT a, pg_column_size(b) FROM compresstest2040 WHERE a = 4;
+
 DROP TABLE toasttest;
+DROP TABLE compresstest1500, compresstest2040;
 
 --
 -- test substr with toasted bytea values

Reply via email to