On Wed, May 12, 2021 at 05:30:26PM +0800, Julien Rouhaud wrote: > On Wed, May 12, 2021 at 10:57:25AM +0200, Pavel Stehule wrote: > > > > My second proposal can work for your example too. pg_stat_statements have > > to require any active queryid computing. And when it is not available, then > > the exception should be raised. > > > > The custom queryid can return null, and still the queryid will be computed. > > Maybe the warning can be enough. Just, if somebody use pg_stat_statements > > function, then enforce the check if queryid is computed (compute_query_id > > is true || some hook is not null), and if not then raise a warning. > > Ah I'm sorry I misunderstood your proposal. Yes, definitely adding a warning > or an error when executing pg_stat_statements() SRF would help, that's a great > idea! > > I'll wait a bit in case someone has any objection, and if not send an updated > patch!
Hearing no complaint, PFA a v2 implementing such a warning. Here's an extract from the updated regression tests: -- Check that pg_stat_statements() will complain if the configuration appears -- to be broken. SET compute_query_id = off; SELECT pg_stat_statements_reset(); pg_stat_statements_reset -------------------------- (1 row) SELECT count(*) FROM pg_stat_statements; WARNING: Query identifier calculation seems to be disabled HINT: If you don't want to use a third-party module to compute query identifiers, you may want to enable compute_query_id count ------- 0 (1 row) I'm of course open to suggestions for some better wording.
>From ae0bce48f5cf1de51aec21e2b34278f780a2a04b Mon Sep 17 00:00:00 2001 From: Julien Rouhaud <julien.rouh...@free.fr> Date: Tue, 11 May 2021 15:20:45 +0800 Subject: [PATCH v2] Change compute_query_id to an enum GUC. The current approach that requires to explicitly enable compute_query_id on top of configuring pg_stat_statements in shared_preload_libraries has been proven to be at best unfriendly, so switch the GUC to an enum defaulting to "auto", which allows third-party plugins to enable in-core query identifier calculation if they require one. While at it, also change pg_stat_statements_internal() to raise a warning if the configuration looks broken (as in the calling query doesn't have a query_id, compute_query_id is explicitly disabled and there's nothing stored in pg_stat_statements hash). A new queryIdWanted() function is provided to let plugins inform us that require a query identifier, and which will enable query identifier calculation iff it's set to "auto". Note that if that function is called during postmaster startup (typically during process_shared_preload_libraries()) then it will be enabled globally, otherwise it will only be enabled in the session(s) that loaded a plugin calling queryIdWanted(). Author: Julien Rouhaud Discussion: https://postgr.es/m/35457b09-36f8-add3-1d07-6034fa585...@oss.nttdata.com --- .../expected/pg_stat_statements.out | 17 +++++++++ .../pg_stat_statements/pg_stat_statements.c | 21 ++++++++++ .../pg_stat_statements.conf | 1 - .../sql/pg_stat_statements.sql | 6 +++ doc/src/sgml/config.sgml | 9 ++++- doc/src/sgml/pgstatstatements.sgml | 13 +++++-- src/backend/commands/explain.c | 2 +- src/backend/parser/analyze.c | 4 +- src/backend/tcop/postgres.c | 2 +- src/backend/utils/misc/guc.c | 38 ++++++++++++++----- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/backend/utils/misc/queryjumble.c | 15 ++++++++ src/include/utils/guc.h | 1 - src/include/utils/queryjumble.h | 12 ++++++ 14 files changed, 120 insertions(+), 23 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 40b5109b55..b071ffc9d5 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -1067,4 +1067,21 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; 2 (1 row) +-- Check that pg_stat_statements() will complain if the configuration appears +-- to be broken. +SET compute_query_id = off; +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT count(*) FROM pg_stat_statements; +WARNING: Query identifier calculation seems to be disabled +HINT: If you don't want to use a third-party module to compute query identifiers, you may want to enable compute_query_id + count +------- + 0 +(1 row) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index f42f07622e..8f5285e09d 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -369,6 +369,12 @@ _PG_init(void) if (!process_shared_preload_libraries_in_progress) return; + /* + * Informat the postmaster that we want to enable query_id calculation if + * compute_query_id is set to auto. + */ + queryIdWanted(); + /* * Define (or redefine) custom GUC variables. */ @@ -1616,6 +1622,21 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, */ LWLockAcquire(pgss->lock, LW_SHARED); + /* + * If no query_id has been computed for the calling query and there is + * no entries stored, then there's likely a configuration error that caller + * may not be aware of so point it out. + */ + if (pgstat_get_my_query_id() == UINT64CONST(0) && + compute_query_id == COMPUTE_QUERY_ID_OFF && + hash_get_num_entries(pgss_hash) == 0) + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Query identifier calculation seems to be disabled"), + errhint("If you don't want to use a third-party module to" + " compute query identifiers, you may want to enable" + " compute_query_id"))); + if (showtext) { /* diff --git a/contrib/pg_stat_statements/pg_stat_statements.conf b/contrib/pg_stat_statements/pg_stat_statements.conf index e47b26040f..13346e2807 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.conf +++ b/contrib/pg_stat_statements/pg_stat_statements.conf @@ -1,2 +1 @@ shared_preload_libraries = 'pg_stat_statements' -compute_query_id = on diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index bc3b6493e6..827a8e3d18 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -437,4 +437,10 @@ SELECT ( SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; +-- Check that pg_stat_statements() will complain if the configuration appears +-- to be broken. +SET compute_query_id = off; +SELECT pg_stat_statements_reset(); +SELECT count(*) FROM pg_stat_statements; + DROP EXTENSION pg_stat_statements; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 45bd1f1b7e..60d8b24f5e 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7627,7 +7627,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; <variablelist> <varlistentry id="guc-compute-query-id" xreflabel="compute_query_id"> - <term><varname>compute_query_id</varname> (<type>boolean</type>) + <term><varname>compute_query_id</varname> (<type>enum</type>) <indexterm> <primary><varname>compute_query_id</varname> configuration parameter</primary> </indexterm> @@ -7643,7 +7643,12 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; identifier to be computed. Note that an external module can alternatively be used if the in-core query identifier computation method is not acceptable. In this case, in-core computation - must be disabled. The default is <literal>off</literal>. + must be always disabled. + Valid values are <literal>off</literal> (always disabled), + <literal>on</literal> (always enabled) and <literal>auto</literal>, + which let modules such as <xref linkend="pgstatstatements"/> + automatically enable it. + The default is <literal>auto</literal>. </para> <note> <para> diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index bc2b6038ee..acfb134797 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -22,10 +22,15 @@ <para> The module will not track statistics unless query - identifiers are calculated. This can be done by enabling <xref - linkend="guc-compute-query-id"/> or using a third-party module that - computes its own query identifiers. Note that all statistics tracked - by this module must be reset if the query identifier method is changed. + identifiers are calculated. This is done by automatically when this + extension is configured in <literal>shared_preload_libraries</literal> and + <xref linkend="guc-compute-query-id"/> is set to <literal>auto</literal> + (which is the default value), or always if <xref + linkend="guc-compute-query-id"/> is set to <literal>on</literal>. + You must set <xref linkend="guc-compute-query-id"/> to <literal>off</literal> + if you want to use a third-party module that computes its own query + identifiers. Note that all statistics tracked by this module must be + reset if the query identifier method is changed. </para> <para> diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 8ab7bca866..9d384234b0 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -245,7 +245,7 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt, es->summary = (summary_set) ? es->summary : es->analyze; query = castNode(Query, stmt->query); - if (compute_query_id) + if (compute_query_id == COMPUTE_QUERY_ID_ON) jstate = JumbleQuery(query, pstate->p_sourcetext); if (post_parse_analyze_hook) diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index e415bc3df0..1108f4133a 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -124,7 +124,7 @@ parse_analyze(RawStmt *parseTree, const char *sourceText, query = transformTopLevelStmt(pstate, parseTree); - if (compute_query_id) + if (compute_query_id == COMPUTE_QUERY_ID_ON) jstate = JumbleQuery(query, sourceText); if (post_parse_analyze_hook) @@ -163,7 +163,7 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText, /* make sure all is well with parameter types */ check_variable_parameters(pstate, query); - if (compute_query_id) + if (compute_query_id == COMPUTE_QUERY_ID_ON) jstate = JumbleQuery(query, sourceText); if (post_parse_analyze_hook) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2d6d145ecc..e4b76af557 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -704,7 +704,7 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree, query = transformTopLevelStmt(pstate, parsetree); - if (compute_query_id) + if (compute_query_id == COMPUTE_QUERY_ID_ON) jstate = JumbleQuery(query, query_string); if (post_parse_analyze_hook) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0a180341c2..835b322c29 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -101,6 +101,7 @@ #include "utils/plancache.h" #include "utils/portal.h" #include "utils/ps_status.h" +#include "utils/queryjumble.h" #include "utils/rls.h" #include "utils/snapmgr.h" #include "utils/tzparser.h" @@ -402,6 +403,23 @@ static const struct config_enum_entry backslash_quote_options[] = { {NULL, 0, false} }; +/* + * Although only "on", "off", and "auto" are documented, we accept all the + * likely variants of "on" and "off". + */ +static const struct config_enum_entry compute_query_id_options[] = { + {"auto", COMPUTE_QUERY_ID_AUTO, false}, + {"on", COMPUTE_QUERY_ID_ON, false}, + {"off", COMPUTE_QUERY_ID_OFF, false}, + {"true", COMPUTE_QUERY_ID_ON, true}, + {"false", COMPUTE_QUERY_ID_OFF, true}, + {"yes", COMPUTE_QUERY_ID_ON, true}, + {"no", COMPUTE_QUERY_ID_OFF, true}, + {"1", COMPUTE_QUERY_ID_ON, true}, + {"0", COMPUTE_QUERY_ID_OFF, true}, + {NULL, 0, false} +}; + /* * Although only "on", "off", and "partition" are documented, we * accept all the likely variants of "on" and "off". @@ -534,7 +552,6 @@ extern const struct config_enum_entry dynamic_shared_memory_options[]; /* * GUC option variables that are exported from this module */ -bool compute_query_id = false; bool log_duration = false; bool Debug_print_plan = false; bool Debug_print_parse = false; @@ -1441,15 +1458,6 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, - { - {"compute_query_id", PGC_SUSET, STATS_MONITORING, - gettext_noop("Compute query identifiers."), - NULL - }, - &compute_query_id, - false, - NULL, NULL, NULL - }, { {"log_parser_stats", PGC_SUSET, STATS_MONITORING, gettext_noop("Writes parser performance statistics to the server log."), @@ -4618,6 +4626,16 @@ static struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"compute_query_id", PGC_SUSET, STATS_MONITORING, + gettext_noop("Compute query identifiers."), + NULL + }, + &compute_query_id, + COMPUTE_QUERY_ID_AUTO, compute_query_id_options, + NULL, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index efde01ee56..6e36e4c2ef 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -604,7 +604,7 @@ # - Monitoring - -#compute_query_id = off +#compute_query_id = auto #log_statement_stats = off #log_parser_stats = off #log_planner_stats = off diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index 1bb9fe20ea..4189fdfa53 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -39,6 +39,9 @@ #define JUMBLE_SIZE 1024 /* query serialization buffer size */ +/* GUC parameters */ +int compute_query_id = COMPUTE_QUERY_ID_AUTO; + static uint64 compute_utility_query_id(const char *str, int query_location, int query_len); static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); @@ -131,6 +134,18 @@ JumbleQuery(Query *query, const char *querytext) return jstate; } +/* + * Enables compute_query_id if it's set to auto. + * + * Third-party plugins can use that function to inform the core that they + * require a query identifier to be computed. + */ +void queryIdWanted(void) +{ + if (compute_query_id == COMPUTE_QUERY_ID_AUTO) + compute_query_id = COMPUTE_QUERY_ID_ON; +} + /* * Compute a query identifier for the given utility query string. */ diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 24a5d9d3fb..a7c3a4958e 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -247,7 +247,6 @@ extern bool log_btree_build_stats; extern PGDLLIMPORT bool check_function_bodies; extern bool session_auth_is_superuser; -extern bool compute_query_id; extern bool log_duration; extern int log_parameter_max_length; extern int log_parameter_max_length_on_error; diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h index 83ba7339fa..578ed50a7e 100644 --- a/src/include/utils/queryjumble.h +++ b/src/include/utils/queryjumble.h @@ -52,7 +52,19 @@ typedef struct JumbleState int highest_extern_param_id; } JumbleState; +/* Values for the compute_query_id GUC */ +typedef enum +{ + COMPUTE_QUERY_ID_OFF, + COMPUTE_QUERY_ID_ON, + COMPUTE_QUERY_ID_AUTO +} ComputeQueryIdType; + +/* GUC parameters */ +extern int compute_query_id; + const char *CleanQuerytext(const char *query, int *location, int *len); JumbleState *JumbleQuery(Query *query, const char *querytext); +void queryIdWanted(void); #endif /* QUERYJUMBLE_H */ -- 2.31.1