On 2/22/22 21:12, Tomas Vondra wrote:
> On 2/22/22 01:36, Fujii Masao wrote:
>>
>>
>> On 2022/02/18 22:28, Tomas Vondra wrote:
>>> Hi,
>>>
>>> here's a slightly updated version of the patch series.
>>
>> Thanks for updating the patches!
>>
>>
>>> The 0001 part
>>> adds tracking of server_version_num, so that it's possible to enable
>>> other features depending on it.
>>
>> Like configure_remote_session() does, can't we use PQserverVersion()
>> instead of implementing new function GetServerVersion()?
>>
> 
> Ah! My knowledge of libpq is limited, so I wasn't sure this function
> exists. It'll simplify the patch.
> 

And here's the slightly simplified patch, without the part adding the
unnecessary GetServerVersion() function.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 089ecf8c9ee44b89becfbcfb9c7a0ddc6c8f197a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 18 Feb 2022 00:33:19 +0100
Subject: [PATCH] postgres_fdw: sample data on remote node for ANALYZE

When performing ANALYZE on a foreign tables, we need to collect sample
of rows. Until now, we simply read all data from the remote node and
built the sample locally. That is very expensive, especially in terms of
network traffic etc. But it's possible to move the sampling to the
remote node, and use either TABLESAMPLE or simply random() to transfer
just much smaller amount of data.

So we run either

   SELECT * FROM table TABLESAMPLE SYSTEM(fraction)

or

  SELECT * FROM table WHERE random() < fraction

depending on the server version (TABLESAMPLE is supported since 9.5).

To do that, we need to determine what fraction of the table to sample.
We rely on reltuples (fetched from the remote node) to be sufficiently
accurate and up to date, and calculate the fraction based on that. We
increase the sample size a bit (in case the table shrunk), and we still
do the reservoir sampling (in case it grew).

Using tsm_system_rows would allow specifying sample size in rows,
without determining sampling rate. But the sampling method may not be
installed, and we'd still have to determine the relation size.

This adds 'sample' option for remote servers / tables. By default, it's
set to 'true' which enables remote sampling. Setting it to 'false' uses
the old approach of fetching everything and sampling locally.
---
 contrib/postgres_fdw/deparse.c      | 152 ++++++++++++++++++++++++++++
 contrib/postgres_fdw/option.c       |   7 +-
 contrib/postgres_fdw/postgres_fdw.c | 142 +++++++++++++++++++++++++-
 contrib/postgres_fdw/postgres_fdw.h |   7 ++
 4 files changed, 304 insertions(+), 4 deletions(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bf12eac0288..32f2c0d5fb3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2267,6 +2267,26 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
 	appendStringInfo(buf, "::pg_catalog.regclass) / %d", BLCKSZ);
 }
 
+/*
+ * Construct SELECT statement to acquire numbe of rows of given relation.
+ *
+ * Note: Maybe this should compare relpages and current relation size
+ * and adjust reltuples accordingly?
+ */
+void
+deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
+{
+	StringInfoData relname;
+
+	/* We'll need the remote relation name as a literal. */
+	initStringInfo(&relname);
+	deparseRelation(&relname, rel);
+
+	appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
+	deparseStringLiteral(buf, relname.data);
+	appendStringInfoString(buf, "::pg_catalog.regclass");
+}
+
 /*
  * Construct SELECT statement to acquire sample rows of given relation.
  *
@@ -2328,6 +2348,138 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 	deparseRelation(buf, rel);
 }
 
+/*
+ * Construct SELECT statement to acquire sample rows of given relation,
+ * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
+ *
+ * SELECT command is appended to buf, and list of columns retrieved
+ * is returned to *retrieved_attrs.
+ *
+ * Note: We could allow selecting system/bernoulli, and maybe even the
+ * optional TSM modules (especially tsm_system_rows would help).
+ */
+void
+deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
+{
+	Oid			relid = RelationGetRelid(rel);
+	TupleDesc	tupdesc = RelationGetDescr(rel);
+	int			i;
+	char	   *colname;
+	List	   *options;
+	ListCell   *lc;
+	bool		first = true;
+
+	*retrieved_attrs = NIL;
+
+	appendStringInfoString(buf, "SELECT ");
+	for (i = 0; i < tupdesc->natts; i++)
+	{
+		/* Ignore dropped columns. */
+		if (TupleDescAttr(tupdesc, i)->attisdropped)
+			continue;
+
+		if (!first)
+			appendStringInfoString(buf, ", ");
+		first = false;
+
+		/* Use attribute name or column_name option. */
+		colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
+		options = GetForeignColumnOptions(relid, i + 1);
+
+		foreach(lc, options)
+		{
+			DefElem    *def = (DefElem *) lfirst(lc);
+
+			if (strcmp(def->defname, "column_name") == 0)
+			{
+				colname = defGetString(def);
+				break;
+			}
+		}
+
+		appendStringInfoString(buf, quote_identifier(colname));
+
+		*retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
+	}
+
+	/* Don't generate bad syntax for zero-column relation. */
+	if (first)
+		appendStringInfoString(buf, "NULL");
+
+	/*
+	 * Construct FROM clause
+	 */
+	appendStringInfoString(buf, " FROM ");
+	deparseRelation(buf, rel);
+	appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
+}
+
+/*
+ * Construct SELECT statement to acquire sample rows of given relation,
+ * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
+ *
+ * SELECT command is appended to buf, and list of columns retrieved
+ * is returned to *retrieved_attrs.
+ *
+ * Note: We could allow selecting system/bernoulli, and maybe even the
+ * optional TSM modules (especially tsm_system_rows would help).
+ */
+void
+deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
+{
+	Oid			relid = RelationGetRelid(rel);
+	TupleDesc	tupdesc = RelationGetDescr(rel);
+	int			i;
+	char	   *colname;
+	List	   *options;
+	ListCell   *lc;
+	bool		first = true;
+
+	*retrieved_attrs = NIL;
+
+	appendStringInfoString(buf, "SELECT ");
+	for (i = 0; i < tupdesc->natts; i++)
+	{
+		/* Ignore dropped columns. */
+		if (TupleDescAttr(tupdesc, i)->attisdropped)
+			continue;
+
+		if (!first)
+			appendStringInfoString(buf, ", ");
+		first = false;
+
+		/* Use attribute name or column_name option. */
+		colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
+		options = GetForeignColumnOptions(relid, i + 1);
+
+		foreach(lc, options)
+		{
+			DefElem    *def = (DefElem *) lfirst(lc);
+
+			if (strcmp(def->defname, "column_name") == 0)
+			{
+				colname = defGetString(def);
+				break;
+			}
+		}
+
+		appendStringInfoString(buf, quote_identifier(colname));
+
+		*retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
+	}
+
+	/* Don't generate bad syntax for zero-column relation. */
+	if (first)
+		appendStringInfoString(buf, "NULL");
+
+	/*
+	 * Construct FROM clause
+	 */
+	appendStringInfoString(buf, " FROM ");
+	deparseRelation(buf, rel);
+	appendStringInfo(buf, " WHERE random() < %f", sample_frac);
+}
+
 /*
  * Construct a simple "TRUNCATE rel" statement
  */
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index 2c6b2894b96..65fc1acadcd 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -121,7 +121,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
 			strcmp(def->defname, "updatable") == 0 ||
 			strcmp(def->defname, "truncatable") == 0 ||
 			strcmp(def->defname, "async_capable") == 0 ||
-			strcmp(def->defname, "keep_connections") == 0)
+			strcmp(def->defname, "keep_connections") == 0 ||
+			strcmp(def->defname, "sample") == 0)
 		{
 			/* these accept only boolean values */
 			(void) defGetBoolean(def);
@@ -252,6 +253,10 @@ InitPgFdwOptions(void)
 		{"keep_connections", ForeignServerRelationId, false},
 		{"password_required", UserMappingRelationId, false},
 
+		/* sampling is available on both server and table */
+		{"sample", ForeignServerRelationId, false},
+		{"sample", ForeignTableRelationId, false},
+
 		/*
 		 * sslcert and sslkey are in fact libpq options, but we repeat them
 		 * here to allow them to appear in both foreign server context (when
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 56654844e8f..6040ee199aa 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4961,6 +4961,68 @@ postgresAnalyzeForeignTable(Relation relation,
 	return true;
 }
 
+/*
+ * postgresCountTuplesForForeignTable
+ *		Count tuples in foreign table (just get pg_class.reltuples).
+ *
+ * Note: It's unclear how accurate reltuples is, maybe size that using
+ * relpages and simple assumptions (1 tuples per page, ...)? Using
+ * tsm_system_rows wold make this somewhat unnecessary.
+ */
+static double
+postgresCountTuplesForForeignTable(Relation relation)
+{
+	ForeignTable *table;
+	UserMapping *user;
+	PGconn	   *conn;
+	StringInfoData sql;
+	PGresult   *volatile res = NULL;
+	double		reltuples = -1;
+
+	/*
+	 * Now we have to get the number of pages.  It's annoying that the ANALYZE
+	 * API requires us to return that now, because it forces some duplication
+	 * of effort between this routine and postgresAcquireSampleRowsFunc.  But
+	 * it's probably not worth redefining that API at this point.
+	 */
+
+	/*
+	 * Get the connection to use.  We do the remote access as the table's
+	 * owner, even if the ANALYZE was started by some other user.
+	 */
+	table = GetForeignTable(RelationGetRelid(relation));
+	user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
+	conn = GetConnection(user, false, NULL);
+
+	/*
+	 * Construct command to get page count for relation.
+	 */
+	initStringInfo(&sql);
+	deparseAnalyzeTuplesSql(&sql, relation);
+
+	/* In what follows, do not risk leaking any PGresults. */
+	PG_TRY();
+	{
+		res = pgfdw_exec_query(conn, sql.data, NULL);
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+			pgfdw_report_error(ERROR, res, conn, false, sql.data);
+
+		if (PQntuples(res) != 1 || PQnfields(res) != 1)
+			elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query");
+		reltuples = strtod(PQgetvalue(res, 0, 0), NULL);
+	}
+	PG_FINALLY();
+	{
+		if (res)
+			PQclear(res);
+	}
+	PG_END_TRY();
+
+	ReleaseConnection(conn);
+
+	return reltuples;
+}
+
 /*
  * Acquire a random sample of rows from foreign table managed by postgres_fdw.
  *
@@ -4991,6 +5053,14 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	unsigned int cursor_number;
 	StringInfoData sql;
 	PGresult   *volatile res = NULL;
+	ListCell   *lc;
+	int			server_version_num;
+
+	/* sampling enabled by default */
+	bool		do_sample = true;
+	bool		use_tablesample = true;
+	double		sample_frac = -1.0;
+	double		reltuples;
 
 	/* Initialize workspace state */
 	astate.rel = relation;
@@ -5018,20 +5088,83 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
 	conn = GetConnection(user, false, NULL);
 
+	/* We'll need server version, so fetch it now. */
+	server_version_num = PQserverVersion(conn);
+
+	/* disable tablesample on old remote servers */
+	if (server_version_num < 95000)
+		use_tablesample = false;
+
+	/*
+	 * Should we use TABLESAMPLE to collect the remote sample?
+	 */
+	foreach(lc, server->options)
+	{
+		DefElem    *def = (DefElem *) lfirst(lc);
+
+		if (strcmp(def->defname, "sample") == 0)
+		{
+			do_sample = defGetBoolean(def);
+			break;
+		}
+	}
+	foreach(lc, table->options)
+	{
+		DefElem    *def = (DefElem *) lfirst(lc);
+
+		if (strcmp(def->defname, "sample") == 0)
+		{
+			do_sample = defGetBoolean(def);
+			break;
+		}
+	}
+
+	if (do_sample)
+	{
+		reltuples = postgresCountTuplesForForeignTable(relation);
+
+		if ((reltuples <= 0) || (targrows >= reltuples))
+			do_sample = false;
+
+		sample_frac = targrows / reltuples;
+
+		/* Let's sample a bit more, we'll reduce the sample locally. */
+		sample_frac *= 1.25;
+
+		/* Sanity checks. */
+		sample_frac = Min(1.0, Max(0.0, sample_frac));
+
+		/*
+		 * When sampling too large fraction, just read everything.
+		 *
+		 * XXX It's not clear where exactly the threshold is, with slow
+		 * network it may be cheaper to sample even 90%.
+		 */
+		if (sample_frac > 0.5)
+			do_sample = false;
+	}
+
 	/*
 	 * Construct cursor that retrieves whole rows from remote.
 	 */
 	cursor_number = GetCursorNumber(conn);
 	initStringInfo(&sql);
 	appendStringInfo(&sql, "DECLARE c%u CURSOR FOR ", cursor_number);
-	deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs);
+
+	if (do_sample && use_tablesample)
+		deparseAnalyzeTableSampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
+	else if (do_sample)
+		deparseAnalyzeLegacySampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
+	else
+		deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs);
+
+	elog(WARNING, "SQL: %s", sql.data);
 
 	/* In what follows, do not risk leaking any PGresults. */
 	PG_TRY();
 	{
 		char		fetch_sql[64];
 		int			fetch_size;
-		ListCell   *lc;
 
 		res = pgfdw_exec_query(conn, sql.data, NULL);
 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
@@ -5120,7 +5253,10 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	*totaldeadrows = 0.0;
 
 	/* We've retrieved all living tuples from foreign server. */
-	*totalrows = astate.samplerows;
+	if (do_sample)
+		*totalrows = reltuples;
+	else
+		*totalrows = astate.samplerows;
 
 	/*
 	 * Emit some interesting relation info
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 8ae79e97e44..6bc0a93de68 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -208,8 +208,15 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
 								   List *returningList,
 								   List **retrieved_attrs);
 extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
+extern void deparseAnalyzeTuplesSql(StringInfo buf, Relation rel);
 extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
 							  List **retrieved_attrs);
+extern void deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel,
+										 List **retrieved_attrs,
+										 double sample_frac);
+extern void deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel,
+										  List **retrieved_attrs,
+										  double sample_frac);
 extern void deparseTruncateSql(StringInfo buf,
 							   List *rels,
 							   DropBehavior behavior,
-- 
2.34.1

Reply via email to