Hi,
here's a slightly updated version of the patch series. The 0001 part
adds tracking of server_version_num, so that it's possible to enable
other features depending on it. In this case it's used to decide whether
TABLESAMPLE is supported.
The 0002 part modifies the sampling. I realized we can do something
similar even on pre-9.5 releases, by running "WHERE random() < $1". Not
perfect, because it still has to read the whole table, but still better
than also sending it over the network.
There's a "sample" option for foreign server/table, which can be used to
disable the sampling if needed.
A simple measurement on a table with 10M rows, on localhost.
old: 6600ms
random: 450ms
tablesample: 40ms (system)
tablesample: 200ms (bernoulli)
Local analyze takes ~190ms, so that's quite close.
regards
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 6524f8c6f0db13c5dca0438bdda194ee5bedebbb Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 18 Feb 2022 01:32:25 +0100
Subject: [PATCH 1/2] postgres_fdw: track server version for connections
To allow using features that only exist on new Postgres versions, we
need some information about version of the remote node. We simply
request server_version_num from the remote node. We only fetch it if
version number is actually needed, and we cache it.
---
contrib/postgres_fdw/connection.c | 44 +++++++++++++++++++++++++++++
contrib/postgres_fdw/postgres_fdw.h | 2 ++
2 files changed, 46 insertions(+)
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index f753c6e2324..3ea2948d3ec 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -279,6 +279,50 @@ GetConnection(UserMapping *user, bool will_prep_stmt, PgFdwConnState **state)
return entry->conn;
}
+/*
+ * Determine remote server version (as an int value).
+ *
+ * The value is determined only once and then cached in PgFdwConnState.
+ */
+int
+GetServerVersion(PGconn *conn, PgFdwConnState *state)
+{
+ PGresult *volatile res = NULL;
+
+ /*
+ * If we already know server version for this connection, we're done.
+ */
+ if (state->server_version_num != 0)
+ return state->server_version_num;
+
+ /* PGresult must be released before leaving this function. */
+ PG_TRY();
+ {
+ char *line;
+
+ /*
+ * Execute EXPLAIN remotely.
+ */
+ res = pgfdw_exec_query(conn, "SHOW server_version_num", NULL);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pgfdw_report_error(ERROR, res, conn, false, "SHOW server_version_num");
+
+ /*
+ * Extract the server version number.
+ */
+ line = PQgetvalue(res, 0, 0);
+ state->server_version_num = strtol(line, NULL, 10);
+ }
+ PG_FINALLY();
+ {
+ if (res)
+ PQclear(res);
+ }
+ PG_END_TRY();
+
+ return state->server_version_num;
+}
+
/*
* Reset all transient state fields in the cached connection entry and
* establish new connection to the remote server.
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 8ae79e97e44..1687c62df2d 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -132,6 +132,7 @@ typedef struct PgFdwRelationInfo
typedef struct PgFdwConnState
{
AsyncRequest *pendingAreq; /* pending async request */
+ int server_version_num; /* remote server version */
} PgFdwConnState;
/* in postgres_fdw.c */
@@ -143,6 +144,7 @@ extern void process_pending_request(AsyncRequest *areq);
extern PGconn *GetConnection(UserMapping *user, bool will_prep_stmt,
PgFdwConnState **state);
extern void ReleaseConnection(PGconn *conn);
+extern int GetServerVersion(PGconn *conn, PgFdwConnState *state);
extern unsigned int GetCursorNumber(PGconn *conn);
extern unsigned int GetPrepStmtNumber(PGconn *conn);
extern void do_sql_command(PGconn *conn, const char *sql);
--
2.34.1
From 546657dd0d3ab3bebdb1145785c62809b78e7644 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 18 Feb 2022 00:33:19 +0100
Subject: [PATCH 2/2] postgres_fdw: sample data on remote node for ANALYZE
When performing ANALYZE on a foreign tables, we need to collect sample
of rows. Until now, we simply read all data from the remote node and
built the sample locally. That is very expensive, especially in terms of
network traffic etc. But it's possible to move the sampling to the
remote node, and use either TABLESAMPLE or simply random() to transfer
just much smaller amount of data.
So we run either
SELECT * FROM table TABLESAMPLE SYSTEM(fraction)
or
SELECT * FROM table WHERE random() < fraction
depending on the server version (TABLESAMPLE is supported since 9.5).
To do that, we need to determine what fraction of the table to sample.
We rely on reltuples (fetched from the remote node) to be sufficiently
accurate and up to date, and calculate the fraction based on that. We
increase the sample size a bit (in case the table shrunk), and we still
do the reservoir sampling (in case it grew).
Using tsm_system_rows would allow specifying sample size in rows,
without determining sampling rate. But the sampling method may not be
installed, and we'd still have to determine the relation size.
This adds 'sample' option for remote servers / tables. By default, it's
set to 'true' which enables remote sampling. Setting it to 'false' uses
the old approach of fetching everything and sampling locally.
---
contrib/postgres_fdw/deparse.c | 152 ++++++++++++++++++++++++++++
contrib/postgres_fdw/option.c | 7 +-
contrib/postgres_fdw/postgres_fdw.c | 145 +++++++++++++++++++++++++-
contrib/postgres_fdw/postgres_fdw.h | 7 ++
4 files changed, 306 insertions(+), 5 deletions(-)
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index bf12eac0288..32f2c0d5fb3 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2267,6 +2267,26 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
appendStringInfo(buf, "::pg_catalog.regclass) / %d", BLCKSZ);
}
+/*
+ * Construct SELECT statement to acquire numbe of rows of given relation.
+ *
+ * Note: Maybe this should compare relpages and current relation size
+ * and adjust reltuples accordingly?
+ */
+void
+deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
+{
+ StringInfoData relname;
+
+ /* We'll need the remote relation name as a literal. */
+ initStringInfo(&relname);
+ deparseRelation(&relname, rel);
+
+ appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
+ deparseStringLiteral(buf, relname.data);
+ appendStringInfoString(buf, "::pg_catalog.regclass");
+}
+
/*
* Construct SELECT statement to acquire sample rows of given relation.
*
@@ -2328,6 +2348,138 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
deparseRelation(buf, rel);
}
+/*
+ * Construct SELECT statement to acquire sample rows of given relation,
+ * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
+ *
+ * SELECT command is appended to buf, and list of columns retrieved
+ * is returned to *retrieved_attrs.
+ *
+ * Note: We could allow selecting system/bernoulli, and maybe even the
+ * optional TSM modules (especially tsm_system_rows would help).
+ */
+void
+deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
+{
+ Oid relid = RelationGetRelid(rel);
+ TupleDesc tupdesc = RelationGetDescr(rel);
+ int i;
+ char *colname;
+ List *options;
+ ListCell *lc;
+ bool first = true;
+
+ *retrieved_attrs = NIL;
+
+ appendStringInfoString(buf, "SELECT ");
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ /* Ignore dropped columns. */
+ if (TupleDescAttr(tupdesc, i)->attisdropped)
+ continue;
+
+ if (!first)
+ appendStringInfoString(buf, ", ");
+ first = false;
+
+ /* Use attribute name or column_name option. */
+ colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
+ options = GetForeignColumnOptions(relid, i + 1);
+
+ foreach(lc, options)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "column_name") == 0)
+ {
+ colname = defGetString(def);
+ break;
+ }
+ }
+
+ appendStringInfoString(buf, quote_identifier(colname));
+
+ *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
+ }
+
+ /* Don't generate bad syntax for zero-column relation. */
+ if (first)
+ appendStringInfoString(buf, "NULL");
+
+ /*
+ * Construct FROM clause
+ */
+ appendStringInfoString(buf, " FROM ");
+ deparseRelation(buf, rel);
+ appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
+}
+
+/*
+ * Construct SELECT statement to acquire sample rows of given relation,
+ * by sampling a fraction of the table using TABLESAMPLE SYSTEM.
+ *
+ * SELECT command is appended to buf, and list of columns retrieved
+ * is returned to *retrieved_attrs.
+ *
+ * Note: We could allow selecting system/bernoulli, and maybe even the
+ * optional TSM modules (especially tsm_system_rows would help).
+ */
+void
+deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel, List **retrieved_attrs, double sample_frac)
+{
+ Oid relid = RelationGetRelid(rel);
+ TupleDesc tupdesc = RelationGetDescr(rel);
+ int i;
+ char *colname;
+ List *options;
+ ListCell *lc;
+ bool first = true;
+
+ *retrieved_attrs = NIL;
+
+ appendStringInfoString(buf, "SELECT ");
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ /* Ignore dropped columns. */
+ if (TupleDescAttr(tupdesc, i)->attisdropped)
+ continue;
+
+ if (!first)
+ appendStringInfoString(buf, ", ");
+ first = false;
+
+ /* Use attribute name or column_name option. */
+ colname = NameStr(TupleDescAttr(tupdesc, i)->attname);
+ options = GetForeignColumnOptions(relid, i + 1);
+
+ foreach(lc, options)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "column_name") == 0)
+ {
+ colname = defGetString(def);
+ break;
+ }
+ }
+
+ appendStringInfoString(buf, quote_identifier(colname));
+
+ *retrieved_attrs = lappend_int(*retrieved_attrs, i + 1);
+ }
+
+ /* Don't generate bad syntax for zero-column relation. */
+ if (first)
+ appendStringInfoString(buf, "NULL");
+
+ /*
+ * Construct FROM clause
+ */
+ appendStringInfoString(buf, " FROM ");
+ deparseRelation(buf, rel);
+ appendStringInfo(buf, " WHERE random() < %f", sample_frac);
+}
+
/*
* Construct a simple "TRUNCATE rel" statement
*/
diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c
index fc3ce6a53a2..02be48ec82c 100644
--- a/contrib/postgres_fdw/option.c
+++ b/contrib/postgres_fdw/option.c
@@ -121,7 +121,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
strcmp(def->defname, "updatable") == 0 ||
strcmp(def->defname, "truncatable") == 0 ||
strcmp(def->defname, "async_capable") == 0 ||
- strcmp(def->defname, "keep_connections") == 0)
+ strcmp(def->defname, "keep_connections") == 0 ||
+ strcmp(def->defname, "sample") == 0)
{
/* these accept only boolean values */
(void) defGetBoolean(def);
@@ -252,6 +253,10 @@ InitPgFdwOptions(void)
{"keep_connections", ForeignServerRelationId, false},
{"password_required", UserMappingRelationId, false},
+ /* sampling is available on both server and table */
+ {"sample", ForeignServerRelationId, false},
+ {"sample", ForeignTableRelationId, false},
+
/*
* sslcert and sslkey are in fact libpq options, but we repeat them
* here to allow them to appear in both foreign server context (when
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 56654844e8f..6eafe520c4e 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4961,6 +4961,68 @@ postgresAnalyzeForeignTable(Relation relation,
return true;
}
+/*
+ * postgresCountTuplesForForeignTable
+ * Count tuples in foreign table (just get pg_class.reltuples).
+ *
+ * Note: It's unclear how accurate reltuples is, maybe size that using
+ * relpages and simple assumptions (1 tuples per page, ...)? Using
+ * tsm_system_rows wold make this somewhat unnecessary.
+ */
+static double
+postgresCountTuplesForForeignTable(Relation relation)
+{
+ ForeignTable *table;
+ UserMapping *user;
+ PGconn *conn;
+ StringInfoData sql;
+ PGresult *volatile res = NULL;
+ double reltuples = -1;
+
+ /*
+ * Now we have to get the number of pages. It's annoying that the ANALYZE
+ * API requires us to return that now, because it forces some duplication
+ * of effort between this routine and postgresAcquireSampleRowsFunc. But
+ * it's probably not worth redefining that API at this point.
+ */
+
+ /*
+ * Get the connection to use. We do the remote access as the table's
+ * owner, even if the ANALYZE was started by some other user.
+ */
+ table = GetForeignTable(RelationGetRelid(relation));
+ user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
+ conn = GetConnection(user, false, NULL);
+
+ /*
+ * Construct command to get page count for relation.
+ */
+ initStringInfo(&sql);
+ deparseAnalyzeTuplesSql(&sql, relation);
+
+ /* In what follows, do not risk leaking any PGresults. */
+ PG_TRY();
+ {
+ res = pgfdw_exec_query(conn, sql.data, NULL);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pgfdw_report_error(ERROR, res, conn, false, sql.data);
+
+ if (PQntuples(res) != 1 || PQnfields(res) != 1)
+ elog(ERROR, "unexpected result from deparseAnalyzeSizeSql query");
+ reltuples = strtod(PQgetvalue(res, 0, 0), NULL);
+ }
+ PG_FINALLY();
+ {
+ if (res)
+ PQclear(res);
+ }
+ PG_END_TRY();
+
+ ReleaseConnection(conn);
+
+ return reltuples;
+}
+
/*
* Acquire a random sample of rows from foreign table managed by postgres_fdw.
*
@@ -4991,6 +5053,15 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
unsigned int cursor_number;
StringInfoData sql;
PGresult *volatile res = NULL;
+ ListCell *lc;
+ PgFdwConnState *state;
+ int server_version_num;
+
+ /* sampling enabled by default */
+ bool do_sample = true;
+ bool use_tablesample = true;
+ double sample_frac = -1.0;
+ double reltuples;
/* Initialize workspace state */
astate.rel = relation;
@@ -5016,7 +5087,63 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
table = GetForeignTable(RelationGetRelid(relation));
server = GetForeignServer(table->serverid);
user = GetUserMapping(relation->rd_rel->relowner, table->serverid);
- conn = GetConnection(user, false, NULL);
+ conn = GetConnection(user, false, &state);
+
+ /* We'll need server version, so fetch it now. */
+ server_version_num = GetServerVersion(conn, state);
+
+ /* disable tablesample on old remote servers */
+ if (server_version_num < 95000)
+ use_tablesample = false;
+
+ /*
+ * Should we use TABLESAMPLE to collect the remote sample?
+ */
+ foreach(lc, server->options)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "sample") == 0)
+ {
+ do_sample = defGetBoolean(def);
+ break;
+ }
+ }
+ foreach(lc, table->options)
+ {
+ DefElem *def = (DefElem *) lfirst(lc);
+
+ if (strcmp(def->defname, "sample") == 0)
+ {
+ do_sample = defGetBoolean(def);
+ break;
+ }
+ }
+
+ if (do_sample)
+ {
+ reltuples = postgresCountTuplesForForeignTable(relation);
+
+ if ((reltuples <= 0) || (targrows >= reltuples))
+ do_sample = false;
+
+ sample_frac = targrows / reltuples;
+
+ /* Let's sample a bit more, we'll reduce the sample locally. */
+ sample_frac *= 1.25;
+
+ /* Sanity checks. */
+ sample_frac = Min(1.0, Max(0.0, sample_frac));
+
+ /*
+ * When sampling too large fraction, just read everything.
+ *
+ * XXX It's not clear where exactly the threshold is, with slow
+ * network it may be cheaper to sample even 90%.
+ */
+ if (sample_frac > 0.5)
+ do_sample = false;
+ }
/*
* Construct cursor that retrieves whole rows from remote.
@@ -5024,14 +5151,21 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
cursor_number = GetCursorNumber(conn);
initStringInfo(&sql);
appendStringInfo(&sql, "DECLARE c%u CURSOR FOR ", cursor_number);
- deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs);
+
+ if (do_sample && use_tablesample)
+ deparseAnalyzeTableSampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
+ else if (do_sample)
+ deparseAnalyzeLegacySampleSql(&sql, relation, &astate.retrieved_attrs, sample_frac);
+ else
+ deparseAnalyzeSql(&sql, relation, &astate.retrieved_attrs);
+
+ elog(WARNING, "SQL: %s", sql.data);
/* In what follows, do not risk leaking any PGresults. */
PG_TRY();
{
char fetch_sql[64];
int fetch_size;
- ListCell *lc;
res = pgfdw_exec_query(conn, sql.data, NULL);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
@@ -5120,7 +5254,10 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
*totaldeadrows = 0.0;
/* We've retrieved all living tuples from foreign server. */
- *totalrows = astate.samplerows;
+ if (do_sample)
+ *totalrows = reltuples;
+ else
+ *totalrows = astate.samplerows;
/*
* Emit some interesting relation info
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 1687c62df2d..54b6556b20c 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -210,8 +210,15 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
List *returningList,
List **retrieved_attrs);
extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
+extern void deparseAnalyzeTuplesSql(StringInfo buf, Relation rel);
extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
List **retrieved_attrs);
+extern void deparseAnalyzeTableSampleSql(StringInfo buf, Relation rel,
+ List **retrieved_attrs,
+ double sample_frac);
+extern void deparseAnalyzeLegacySampleSql(StringInfo buf, Relation rel,
+ List **retrieved_attrs,
+ double sample_frac);
extern void deparseTruncateSql(StringInfo buf,
List *rels,
DropBehavior behavior,
--
2.34.1