On Tue, 30 Jan 2024 14:57:20 +0800
jian he <jian.universal...@gmail.com> wrote:

> On Tue, Jan 30, 2024 at 1:56 PM Yugo NAGATA <nag...@sraoss.co.jp> wrote:
> >
> > I attached the correct one, v4.
> >
> 
> +-- Test pg_column_toast_chunk_id:
> +-- Check if the returned chunk_id exists in the TOAST table
> +CREATE TABLE test_chunk_id (v1 text, v2 text);
> +INSERT INTO test_chunk_id VALUES (
> +  repeat('0123456789', 10), -- v1: small enough not to be TOASTed
> +  repeat('0123456789', 100000)); -- v2: large enough to be TOASTed
> 
> select pg_size_pretty(100000::bigint);
> return 98kb.
> 
> I think this is just too much, maybe I didn't consider the
> implications of compression.
> Anyway, I refactored the tests, making the toast value size be small.

Actually the data is compressed and the size is much smaller,
but I agree with you it is better not to generate large data unnecessarily.
I rewrote the test to disallow compression in the toast data using 
"ALTER TABLE ... SET STORAGE EXTERNAL". In this case, any text larger
than 2k will be TOASTed on disk without compression, and it makes the
test simple, not required to use string_agg.
> 
> I aslo refactor the doc.
> pg_column_toast_chunk_id entry will be right after pg_column_compression 
> entry.
> You can check the screenshot.

I found the document order was not changed between my patch and yours.
In both, pg_column_toast_chunk_id entry is right after 
pg_column_compression.

Here is a updated patch, v6.

Regards,
Yugo Nagata

-- 
Yugo NAGATA <nag...@sraoss.co.jp>
>From dfec0fdb52c6aad8846b5c984e111dc8b2985e1a Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Wed, 29 Mar 2023 09:59:25 +0900
Subject: [PATCH v6] Add pg_column_toast_chunk_id function

This function returns the chunk_id of an on-disk TOASTed value, or
NULL if the value is un-TOASTed or not on disk. This enables users to
know which columns are actually TOASTed. This function is also useful
to identify a problematic row when an error like
  "ERROR:  unexpected chunk number ... (expected ...) for toast value"
occurs.
---
 doc/src/sgml/func.sgml                       | 17 ++++++++
 src/backend/utils/adt/varlena.c              | 41 ++++++++++++++++++++
 src/include/catalog/pg_proc.dat              |  3 ++
 src/test/regress/expected/misc_functions.out | 21 ++++++++++
 src/test/regress/sql/misc_functions.sql      | 23 +++++++++++
 5 files changed, 105 insertions(+)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6788ba8ef4..4116aaff7a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -28502,6 +28502,23 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_column_toast_chunk_id</primary>
+        </indexterm>
+        <function>pg_column_toast_chunk_id</function> ( <type>"any"</type> )
+        <returnvalue>oid</returnvalue>
+       </para>
+       <para>
+        Shows the <structfield>chunk_id</structfield> of an on-disk
+        <acronym>TOAST</acronym>ed value. Returns <literal>NULL</literal>
+        if the value is un-<acronym>TOAST</acronym>ed or not on-disk.
+        See <xref linkend="storage-toast-ondisk"/> for details about
+        <acronym>TOAST</acronym>.
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 543afb66e5..84d36781a4 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -5105,6 +5105,47 @@ pg_column_compression(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(cstring_to_text(result));
 }
 
+/*
+ * Return the chunk_id of the on-disk TOASTed value.
+ * Return NULL if the value is unTOASTed or not on disk.
+ */
+Datum
+pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
+{
+	int			typlen;
+	struct varlena *attr;
+	struct varatt_external toast_pointer;
+
+	/* On first call, get the input type's typlen, and save at *fn_extra */
+	if (fcinfo->flinfo->fn_extra == NULL)
+	{
+		/* Lookup the datatype of the supplied argument */
+		Oid			argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+		typlen = get_typlen(argtypeid);
+		if (typlen == 0)		/* should not happen */
+			elog(ERROR, "cache lookup failed for type %u", argtypeid);
+
+		fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+													  sizeof(int));
+		*((int *) fcinfo->flinfo->fn_extra) = typlen;
+	}
+	else
+		typlen = *((int *) fcinfo->flinfo->fn_extra);
+
+	if (typlen != -1)
+		PG_RETURN_NULL();
+
+	attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
+
+	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
+		PG_RETURN_NULL();
+
+	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+	PG_RETURN_OID(toast_pointer.va_valueid);
+}
+
 /*
  * string_agg - Concatenates values and returns string.
  *
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 29af4ce65d..9ab71646e3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -7454,6 +7454,9 @@
 { oid => '2121', descr => 'compression method for the compressed datum',
   proname => 'pg_column_compression', provolatile => 's', prorettype => 'text',
   proargtypes => 'any', prosrc => 'pg_column_compression' },
+{ oid => '8393', descr => 'chunk ID of on-disk TOASTed value',
+  proname => 'pg_column_toast_chunk_id', provolatile => 's', prorettype => 'oid',
+  proargtypes => 'any', prosrc => 'pg_column_toast_chunk_id' },
 { oid => '2322',
   descr => 'total disk space usage for the specified tablespace',
   proname => 'pg_tablespace_size', provolatile => 'v', prorettype => 'int8',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 7c15477104..2d11f33fe6 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -683,3 +683,24 @@ SELECT gist_stratnum_identity(18::smallint);
                      18
 (1 row)
 
+-- Test pg_column_toast_chunk_id:
+-- Check if the returned chunk_id exists in the TOAST table
+CREATE TABLE test_chunk_id (v1 text, v2 text);
+-- Use uncompressed toast data
+ALTER TABLE test_chunk_id ALTER COLUMN v2 SET STORAGE EXTERNAL;
+-- v1: small enough not to be TOASTed
+-- v2: large enough to be TOASTed
+INSERT INTO test_chunk_id(v1,v2)
+  VALUES (repeat('x', 1), repeat('x', 2048));
+DO $$
+  DECLARE result text default 'not_ok';
+  BEGIN
+  EXECUTE format(
+    'SELECT ''ok'' FROM test_chunk_id
+	   WHERE pg_column_toast_chunk_id(v1) IS NULL AND
+	         pg_column_toast_chunk_id(v2) IN (SELECT chunk_id FROM pg_toast.pg_toast_%s)',
+    regclass('test_chunk_id')::int::text) INTO result;
+  RAISE INFO '%', result;
+END; $$;
+INFO:  ok
+DROP TABLE test_chunk_id;
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index 851dad90f4..7ab95a939e 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -254,3 +254,26 @@ FROM pg_walfile_name_offset('0/0'::pg_lsn + :segment_size - 1),
 -- test stratnum support functions
 SELECT gist_stratnum_identity(3::smallint);
 SELECT gist_stratnum_identity(18::smallint);
+
+-- Test pg_column_toast_chunk_id:
+-- Check if the returned chunk_id exists in the TOAST table
+CREATE TABLE test_chunk_id (v1 text, v2 text);
+-- Use uncompressed toast data
+ALTER TABLE test_chunk_id ALTER COLUMN v2 SET STORAGE EXTERNAL;
+
+-- v1: small enough not to be TOASTed
+-- v2: large enough to be TOASTed
+INSERT INTO test_chunk_id(v1,v2)
+  VALUES (repeat('x', 1), repeat('x', 2048));
+
+DO $$
+  DECLARE result text default 'not_ok';
+  BEGIN
+  EXECUTE format(
+    'SELECT ''ok'' FROM test_chunk_id
+	   WHERE pg_column_toast_chunk_id(v1) IS NULL AND
+	         pg_column_toast_chunk_id(v2) IN (SELECT chunk_id FROM pg_toast.pg_toast_%s)',
+    regclass('test_chunk_id')::int::text) INTO result;
+  RAISE INFO '%', result;
+END; $$;
+DROP TABLE test_chunk_id;
-- 
2.25.1

Reply via email to