On Sat, 2025-02-22 at 00:00 -0500, Corey Huinker wrote: > > Attached is the first optimization, which gets rid of the pg_class > queries entirely, instead getting the same information from the > existing queries in getTables and getIndexes.
Attached a revised version. The main changes are that the only struct it changes is RelStatsInfo, and it doesn't carry around string values. IIUC, your version carried around the string values so that there would be no conversion; it would hold the string from one result to the next. That makes sense, but it seemed to change a lot of struct fields, and have unnecessary string copying and memory usage which was not freed. Instead, I used float_to_shortest_decimal_buf(), which is what float4out() uses, which should be a consistent way to convert the float value. That meant that we couldn't use appendNamedArgument() as easily, but it wasn't helping much in that function anyway, because it was no longer a loop. I didn't measure any performance difference between your version and mine, but avoiding a few allocations couldn't hurt. It seems to save just under 20% on an unoptimized build. Regards, Jeff Davis
From 2f16b7cf941fc14e3156e8ddc536c8f93f97eb77 Mon Sep 17 00:00:00 2001 From: Corey Huinker <corey.huin...@gmail.com> Date: Fri, 21 Feb 2025 23:31:04 -0500 Subject: [PATCH v2j] Avoid unnecessary relation stats query in pg_dump. The few fields we need can be easily collected in getTables() and getIndexes() and stored in RelStatsInfo. Co-authored-by: Corey Huinker <corey.huin...@gmail.com> Co-authored-by: Jeff Davis <pg...@j-davis.com> Discussion: https://postgr.es/m/CADkLM=f0a43atd88xw4xcfayef25g-7htrhx_whv40hyocs...@mail.gmail.com --- src/bin/pg_dump/pg_dump.c | 142 +++++++++++++++----------------------- src/bin/pg_dump/pg_dump.h | 5 +- 2 files changed, 61 insertions(+), 86 deletions(-) diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index afd79287177..d119ce716b0 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -56,6 +56,7 @@ #include "common/connect.h" #include "common/int.h" #include "common/relpath.h" +#include "common/shortest_dec.h" #include "compress_io.h" #include "dumputils.h" #include "fe_utils/option_utils.h" @@ -6814,7 +6815,8 @@ getFuncs(Archive *fout) * */ static RelStatsInfo * -getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind) +getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, + float reltuples, int32 relallvisible, char relkind) { if (!fout->dopt->dumpStatistics) return NULL; @@ -6839,6 +6841,9 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, char relkind) dobj->components |= DUMP_COMPONENT_STATISTICS; dobj->name = pg_strdup(rel->name); dobj->namespace = rel->namespace; + info->relpages = relpages; + info->reltuples = reltuples; + info->relallvisible = relallvisible; info->relkind = relkind; info->postponed_def = false; @@ -6874,6 +6879,8 @@ getTables(Archive *fout, int *numTables) int i_relhasindex; int i_relhasrules; int i_relpages; + int i_reltuples; + int i_relallvisible; int i_toastpages; int i_owning_tab; int i_owning_col; @@ -6924,7 +6931,7 @@ getTables(Archive *fout, int *numTables) "c.relowner, " "c.relchecks, " "c.relhasindex, c.relhasrules, c.relpages, " - "c.relhastriggers, " + "c.reltuples, c.relallvisible, c.relhastriggers, " "c.relpersistence, " "c.reloftype, " "c.relacl, " @@ -7088,6 +7095,8 @@ getTables(Archive *fout, int *numTables) i_relhasindex = PQfnumber(res, "relhasindex"); i_relhasrules = PQfnumber(res, "relhasrules"); i_relpages = PQfnumber(res, "relpages"); + i_reltuples = PQfnumber(res, "reltuples"); + i_relallvisible = PQfnumber(res, "relallvisible"); i_toastpages = PQfnumber(res, "toastpages"); i_owning_tab = PQfnumber(res, "owning_tab"); i_owning_col = PQfnumber(res, "owning_col"); @@ -7134,6 +7143,9 @@ getTables(Archive *fout, int *numTables) for (i = 0; i < ntups; i++) { + float reltuples = atof(PQgetvalue(res, i, i_reltuples)); + int32 relallvisible = atoi(PQgetvalue(res, i, i_relallvisible)); + tblinfo[i].dobj.objType = DO_TABLE; tblinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, i_reltableoid)); tblinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_reloid)); @@ -7233,7 +7245,8 @@ getTables(Archive *fout, int *numTables) /* Add statistics */ if (tblinfo[i].interesting) - getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relkind); + getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages, + reltuples, relallvisible, tblinfo[i].relkind); /* * Read-lock target tables to make sure they aren't DROPPED or altered @@ -7499,6 +7512,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_oid, i_indrelid, i_indexname, + i_relpages, + i_reltuples, + i_relallvisible, i_parentidx, i_indexdef, i_indnkeyatts, @@ -7552,6 +7568,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) appendPQExpBufferStr(query, "SELECT t.tableoid, t.oid, i.indrelid, " "t.relname AS indexname, " + "t.relpages, t.reltuples, t.relallvisible, " "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, " "i.indkey, i.indisclustered, " "c.contype, c.conname, " @@ -7659,6 +7676,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) i_oid = PQfnumber(res, "oid"); i_indrelid = PQfnumber(res, "indrelid"); i_indexname = PQfnumber(res, "indexname"); + i_relpages = PQfnumber(res, "relpages"); + i_reltuples = PQfnumber(res, "reltuples"); + i_relallvisible = PQfnumber(res, "relallvisible"); i_parentidx = PQfnumber(res, "parentidx"); i_indexdef = PQfnumber(res, "indexdef"); i_indnkeyatts = PQfnumber(res, "indnkeyatts"); @@ -7725,6 +7745,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) char contype; char indexkind; RelStatsInfo *relstats; + int32 relpages = atoi(PQgetvalue(res, j, i_relpages)); + float reltuples = atof(PQgetvalue(res, j, i_reltuples)); + int32 relallvisible = atoi(PQgetvalue(res, j, i_relallvisible)); indxinfo[j].dobj.objType = DO_INDEX; indxinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); @@ -7759,7 +7782,8 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) indexkind = RELKIND_PARTITIONED_INDEX; contype = *(PQgetvalue(res, j, i_contype)); - relstats = getRelationStatistics(fout, &indxinfo[j].dobj, indexkind); + relstats = getRelationStatistics(fout, &indxinfo[j].dobj, relpages, + reltuples, relallvisible, indexkind); if (contype == 'p' || contype == 'u' || contype == 'x') { @@ -10383,18 +10407,6 @@ dumpComment(Archive *fout, const char *type, catalogId, subid, dumpId, NULL); } -/* - * Tabular description of the parameters to pg_restore_relation_stats() - * param_name, param_type - */ -static const char *rel_stats_arginfo[][2] = { - {"relation", "regclass"}, - {"version", "integer"}, - {"relpages", "integer"}, - {"reltuples", "real"}, - {"relallvisible", "integer"}, -}; - /* * Tabular description of the parameters to pg_restore_attribute_stats() * param_name, param_type @@ -10419,30 +10431,6 @@ static const char *att_stats_arginfo[][2] = { {"range_bounds_histogram", "text"}, }; -/* - * getRelStatsExportQuery -- - * - * Generate a query that will fetch all relation (e.g. pg_class) - * stats for a given relation. - */ -static void -getRelStatsExportQuery(PQExpBuffer query, Archive *fout, - const char *schemaname, const char *relname) -{ - resetPQExpBuffer(query); - appendPQExpBufferStr(query, - "SELECT c.oid::regclass AS relation, " - "current_setting('server_version_num') AS version, " - "c.relpages, c.reltuples, c.relallvisible " - "FROM pg_class c " - "JOIN pg_namespace n " - "ON n.oid = c.relnamespace " - "WHERE n.nspname = "); - appendStringLiteralAH(query, schemaname, fout); - appendPQExpBufferStr(query, " AND c.relname = "); - appendStringLiteralAH(query, relname, fout); -} - /* * getAttStatsExportQuery -- * @@ -10454,21 +10442,22 @@ getAttStatsExportQuery(PQExpBuffer query, Archive *fout, const char *schemaname, const char *relname) { resetPQExpBuffer(query); - appendPQExpBufferStr(query, - "SELECT c.oid::regclass AS relation, " - "s.attname," - "s.inherited," - "current_setting('server_version_num') AS version, " - "s.null_frac," - "s.avg_width," - "s.n_distinct," - "s.most_common_vals," - "s.most_common_freqs," - "s.histogram_bounds," - "s.correlation," - "s.most_common_elems," - "s.most_common_elem_freqs," - "s.elem_count_histogram,"); + appendPQExpBuffer(query, + "SELECT c.oid::regclass AS relation, " + "s.attname," + "s.inherited," + "'%u'::integer AS version, " + "s.null_frac," + "s.avg_width," + "s.n_distinct," + "s.most_common_vals," + "s.most_common_freqs," + "s.histogram_bounds," + "s.correlation," + "s.most_common_elems," + "s.most_common_elem_freqs," + "s.elem_count_histogram,", + fout->remoteVersion); if (fout->remoteVersion >= 170000) appendPQExpBufferStr(query, @@ -10521,34 +10510,21 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, * Append a formatted pg_restore_relation_stats statement. */ static void -appendRelStatsImport(PQExpBuffer out, Archive *fout, PGresult *res) +appendRelStatsImport(PQExpBuffer out, Archive *fout, const RelStatsInfo *rsinfo) { - const char *sep = ""; + const char *qualname = fmtQualifiedId(rsinfo->dobj.namespace->dobj.name, rsinfo->dobj.name); + char reltuples_str[FLOAT_SHORTEST_DECIMAL_LEN]; - if (PQntuples(res) == 0) - return; + float_to_shortest_decimal_buf(rsinfo->reltuples, reltuples_str); appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n"); - - for (int argno = 0; argno < lengthof(rel_stats_arginfo); argno++) - { - const char *argname = rel_stats_arginfo[argno][0]; - const char *argtype = rel_stats_arginfo[argno][1]; - int fieldno = PQfnumber(res, argname); - - if (fieldno < 0) - pg_fatal("relation stats export query missing field '%s'", - argname); - - if (PQgetisnull(res, 0, fieldno)) - continue; - - appendPQExpBufferStr(out, sep); - appendNamedArgument(out, fout, argname, PQgetvalue(res, 0, fieldno), argtype); - - sep = ",\n"; - } - appendPQExpBufferStr(out, "\n);\n"); + appendPQExpBuffer(out, "\t'relation', '%s'::regclass,\n", qualname); + appendPQExpBuffer(out, "\t'version', '%u'::integer,\n", + fout->remoteVersion); + appendPQExpBuffer(out, "\t'relpages', '%d'::integer,\n", rsinfo->relpages); + appendPQExpBuffer(out, "\t'reltuples', '%s'::real,\n", reltuples_str); + appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n", + rsinfo->relallvisible); } /* @@ -10643,15 +10619,11 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) tag = createPQExpBuffer(); appendPQExpBufferStr(tag, fmtId(dobj->name)); - query = createPQExpBuffer(); out = createPQExpBuffer(); - getRelStatsExportQuery(query, fout, dobj->namespace->dobj.name, - dobj->name); - res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); - appendRelStatsImport(out, fout, res); - PQclear(res); + appendRelStatsImport(out, fout, rsinfo); + query = createPQExpBuffer(); getAttStatsExportQuery(query, fout, dobj->namespace->dobj.name, dobj->name); res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index f08f5905aa3..9d6a4857c4b 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -328,7 +328,7 @@ typedef struct _tableInfo Oid owning_tab; /* OID of table owning sequence */ int owning_col; /* attr # of column owning sequence */ bool is_identity_sequence; - int relpages; /* table's size in pages (from pg_class) */ + int32 relpages; /* table's size in pages (from pg_class) */ int toastpages; /* toast table's size in pages, if any */ bool interesting; /* true if need to collect more data */ @@ -438,6 +438,9 @@ typedef struct _indexAttachInfo typedef struct _relStatsInfo { DumpableObject dobj; + int32 relpages; + float reltuples; + int32 relallvisible; char relkind; /* 'r', 'm', 'i', etc */ bool postponed_def; /* stats must be postponed into post-data */ } RelStatsInfo; -- 2.34.1