On Wed, Mar 07, 2018 at 05:53:18PM -0800, Peter Geoghegan wrote:
> On Thu, Mar 1, 2018 at 2:47 PM, Peter Geoghegan <[email protected]> wrote:
> > No. Just an oversight. Looks like _bt_parallel_build_main() should
> > call pgstat_report_activity(), just like ParallelQueryMain().
> >
> > I'll come up with a patch soon.
>
> Attached patch has parallel CREATE INDEX propagate debug_query_string
> to workers.
Two test suites segfault under the following configuration:
cat >/tmp/max_parallel.conf <<EOF
log_line_prefix = '%m [%p:%l] %q%a '
log_connections = 'true'
log_disconnections = 'true'
log_statement = 'all'
fsync = off
authentication_timeout = '600s'
wal_sender_timeout = '18000s'
wal_level = minimal
max_wal_senders = 0
# Maximize parallelism. Many suites fail under these settings, due to plan
# changes. That is not a bug, but new crashes do qualify as bugs.
force_parallel_mode = regress
min_parallel_index_scan_size = 0
min_parallel_table_scan_size = 0
parallel_setup_cost = 0
parallel_tuple_cost = 0
EOF
make -C src/test/modules/worker_spi check TEMP_CONFIG=/tmp/max_parallel.conf #
[1]
make -C src/test/subscription check PROVE_TESTS=t/010_truncate.pl
TEMP_CONFIG=/tmp/max_parallel.conf # [2]
In both, the crash is at the strlen(debug_query_string) from $SUBJECT.
worker_spi is easy to fix by setting debug_query_string, as attached. Since
worker_spi has real query strings, I think that's an independent good. In the
other suite, logical replication apply_handle_truncate() constructs an
ExecuteTruncateGuts() call without use of a query string. Options:
1. Disable parallelism for the index build under ExecuteTruncateGuts().
Nobody will mourn a performance loss from declining parallelism for an
empty index, but I feel like this is fixing in the wrong place.
2. Make _bt_begin_parallel() and begin_parallel_vacuum() recognize the
debug_query_string==NULL case and reproduce it on the worker.
3. Require bgworkers to set debug_query_string before entering code of vacuum,
truncate, etc. Logical replication might synthesize a DDL statement, or it
might just use a constant string.
I tend to prefer (2), but (3) isn't bad. Opinions?
(With debug_query_string fixed, 010_truncate.pl next fails with "cannot take
query snapshot during a parallel operation". I'll report that separately,
probably on the thread for commit 039eb6e.)
[1]
#0 __strlen_sse2_pminub () at
../sysdeps/x86_64/multiarch/strlen-sse2-pminub.S:38
#1 0x0000000000508844 in _bt_begin_parallel (request=1,
isconcurrent=<optimized out>, buildstate=0x7ffe353692a0) at nbtsort.c:1536
#2 _bt_spools_heapscan (indexInfo=0x2446768, buildstate=0x7ffe353692a0,
index=..., heap=0x7ff995b85948) at nbtsort.c:394
#3 btbuild (heap=0x7ff995b85948, index=0x7ff995b85d78, indexInfo=0x2446768) at
nbtsort.c:326
#4 0x0000000000568d0b in index_build
(heapRelation=heapRelation@entry=0x7ff995b85948,
indexRelation=indexRelation@entry=0x7ff995b85d78,
indexInfo=indexInfo@entry=0x2446768, isreindex=isreindex@entry=false,
parallel=parallel@entry=true) at index.c:2898
#5 0x000000000056a450 in index_create
(heapRelation=heapRelation@entry=0x7ff995b85948,
indexRelationName=indexRelationName@entry=0x7ffe35369800
"pg_toast_16388_index", indexRelationId=16393,
indexRelationId@entry=0, parentIndexRelid=parentIndexRelid@entry=0,
parentConstraintId=parentConstraintId@entry=0, relFileNode=relFileNode@entry=0,
indexInfo=indexInfo@entry=0x2446768,
indexColNames=0x2445a28,
accessMethodObjectId=accessMethodObjectId@entry=403,
tableSpaceId=tableSpaceId@entry=0,
collationObjectId=collationObjectId@entry=0x7ffe35369780,
classObjectId=classObjectId@entry=0x7ffe35369790,
coloptions=coloptions@entry=0x7ffe35369770, reloptions=reloptions@entry=0,
flags=flags@entry=1, constr_flags=constr_flags@entry=0,
allow_system_table_mods=allow_system_table_mods@entry=true,
is_internal=is_internal@entry=true, constraintId=constraintId@entry=0x0) at
index.c:1217
#6 0x000000000058ad36 in create_toast_table (rel=rel@entry=0x7ff995b7db68,
toastOid=toastOid@entry=0, toastIndexOid=toastIndexOid@entry=0,
reloptions=reloptions@entry=0, lockmode=lockmode@entry=8,
check=check@entry=false) at toasting.c:310
#7 0x000000000058afe2 in CheckAndCreateToastTable (relOid=relOid@entry=16388,
reloptions=reloptions@entry=0, lockmode=lockmode@entry=8,
check=check@entry=false) at toasting.c:82
#8 0x000000000058b029 in NewRelationCreateToastTable
(relOid=relOid@entry=16388, reloptions=reloptions@entry=0) at toasting.c:71
#9 0x00000000007f3027 in ProcessUtilitySlow (pstate=pstate@entry=0x240dc40,
pstmt=pstmt@entry=0x2409358,
queryString=queryString@entry=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE
TABLE \"counted\" (\t\ttype text CHECK (type IN ('total', 'delta')),
\t\tvalue\tinteger)CREATE UNIQUE INDEX \"counted_unique_total\" ON \"counted\"
(type) WHERE type = 'total'", context=context@entry=PROCESS_UTILITY_SUBCOMMAND,
params=params@entry=0x0, queryEnv=queryEnv@entry=0x0, qc=qc@entry=0x0,
dest=0xa91ea0 <donothingDR>)
at utility.c:1191
#10 0x00000000007f14b4 in standard_ProcessUtility (pstmt=0x2409358,
queryString=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE TABLE \"counted\"
(\t\ttype text CHECK (type IN ('total', 'delta')), \t\tvalue\tinteger)CREATE
UNIQUE INDEX \"counted_unique_total\" ON \"counted\" (type) WHERE type =
'total'", context=PROCESS_UTILITY_SUBCOMMAND, params=0x0, queryEnv=0x0,
dest=0xa91ea0 <donothingDR>, qc=0x0) at utility.c:1071
#11 0x0000000000615ba6 in CreateSchemaCommand (stmt=stmt@entry=0x2408b40,
queryString=queryString@entry=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE
TABLE \"counted\" (\t\ttype text CHECK (type IN ('total', 'delta')),
\t\tvalue\tinteger)CREATE UNIQUE INDEX \"counted_unique_total\" ON \"counted\"
(type) WHERE type = 'total'", stmt_location=0, stmt_len=0) at schemacmds.c:192
#12 0x00000000007f2365 in ProcessUtilitySlow (pstate=pstate@entry=0x24090d0,
pstmt=pstmt@entry=0x2408d98,
queryString=queryString@entry=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE
TABLE \"counted\" (\t\ttype text CHECK (type IN ('total', 'delta')),
\t\tvalue\tinteger)CREATE UNIQUE INDEX \"counted_unique_total\" ON \"counted\"
(type) WHERE type = 'total'", context=context@entry=PROCESS_UTILITY_QUERY,
params=params@entry=0x0, queryEnv=queryEnv@entry=0x0,
qc=qc@entry=0x7ffe35369fd0, dest=0xa91de0 <spi_printtupDR>)
at utility.c:1125
#13 0x00000000007f14b4 in standard_ProcessUtility (pstmt=0x2408d98,
queryString=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE TABLE \"counted\"
(\t\ttype text CHECK (type IN ('total', 'delta')), \t\tvalue\tinteger)CREATE
UNIQUE INDEX \"counted_unique_total\" ON \"counted\" (type) WHERE type =
'total'", context=PROCESS_UTILITY_QUERY, params=0x0, queryEnv=0x0,
dest=0xa91de0 <spi_printtupDR>, qc=0x7ffe35369fd0) at utility.c:1071
#14 0x00000000006a1ace in _SPI_execute_plan (plan=plan@entry=0x7ffe3536a050,
paramLI=paramLI@entry=0x0, snapshot=snapshot@entry=0x0,
crosscheck_snapshot=crosscheck_snapshot@entry=0x0,
read_only=read_only@entry=false, fire_triggers=fire_triggers@entry=true,
tcount=tcount@entry=0, caller_dest=caller_dest@entry=0x0) at spi.c:2443
#15 0x00000000006a2102 in SPI_execute (
src=0x24056a0 "CREATE SCHEMA \"schema4\" CREATE TABLE \"counted\" (\t\ttype
text CHECK (type IN ('total', 'delta')), \t\tvalue\tinteger)CREATE UNIQUE INDEX
\"counted_unique_total\" ON \"counted\" (type) WHERE type = 'total'",
read_only=read_only@entry=false, tcount=tcount@entry=0) at spi.c:515
#16 0x00007ff98a80592f in initialize_worker_spi (table=0x237d268) at
worker_spi.c:150
#17 worker_spi_main (main_arg=<optimized out>) at worker_spi.c:187
#18 0x0000000000753f0f in StartBackgroundWorker () at bgworker.c:820
[2]
#0 __strlen_sse2_pminub () at
../sysdeps/x86_64/multiarch/strlen-sse2-pminub.S:38
#1 0x0000000000508844 in _bt_begin_parallel (request=1,
isconcurrent=<optimized out>, buildstate=0x7ffdc868f310) at nbtsort.c:1536
#2 _bt_spools_heapscan (indexInfo=0x1c5bb48, buildstate=0x7ffdc868f310,
index=..., heap=0x7f7f33bad800) at nbtsort.c:394
#3 btbuild (heap=0x7f7f33bad800, index=0x7f7f33badd98, indexInfo=0x1c5bb48) at
nbtsort.c:326
#4 0x0000000000568d0b in index_build
(heapRelation=heapRelation@entry=0x7f7f33bad800,
indexRelation=indexRelation@entry=0x7f7f33badd98,
indexInfo=indexInfo@entry=0x1c5bb48, isreindex=isreindex@entry=true,
parallel=parallel@entry=true) at index.c:2898
#5 0x000000000056baea in reindex_index (indexId=indexId@entry=16387,
skip_constraint_checks=<optimized out>, persistence=persistence@entry=112 'p',
options=options@entry=0) at index.c:3541
#6 0x000000000056c060 in reindex_relation (relid=<optimized out>,
flags=flags@entry=1, options=options@entry=0) at index.c:3764
#7 0x000000000062e5a7 in ExecuteTruncateGuts
(explicit_rels=explicit_rels@entry=0x1c5ae60, relids=<optimized out>,
relids_logged=relids_logged@entry=0x1c5bd50,
behavior=behavior@entry=DROP_RESTRICT,
restart_seqs=<optimized out>) at tablecmds.c:1890
#8 0x000000000078c099 in apply_handle_truncate (s=0x7ffdc868f7b0) at
worker.c:1879
#9 apply_dispatch (s=s@entry=0x7ffdc868f890) at worker.c:1930
#10 0x000000000078cfbb in LogicalRepApplyLoop (last_received=23166784) at
worker.c:2155
#11 ApplyWorkerMain (main_arg=<optimized out>) at worker.c:3101
#12 0x0000000000753f0f in StartBackgroundWorker () at bgworker.c:820
#13 0x000000000076162d in do_start_bgworker (rw=0x1bdbb50) at postmaster.c:5890
#14 maybe_start_bgworkers () at postmaster.c:6115
#15 0x00000000007621ae in sigusr1_handler (postgres_signal_arg=<optimized out>)
at postmaster.c:5276
#16 <signal handler called>
#17 0x00007f7f31f1d983 in __select_nocancel () at
../sysdeps/unix/syscall-template.S:81
#18 0x00000000004881cf in ServerLoop () at postmaster.c:1706
#19 0x0000000000763885 in PostmasterMain (argc=argc@entry=4,
argv=argv@entry=0x1bb3030) at postmaster.c:1415
#20 0x000000000048a56d in main (argc=4, argv=0x1bb3030) at main.c:209
commit 753a321
Author: Noah Misch <[email protected]>
AuthorDate: Tue Oct 13 17:01:40 2020 -0700
Commit: Noah Misch <[email protected]>
CommitDate: Tue Oct 13 17:01:40 2020 -0700
worker_spi: set debug_query_string for each SPI_execute
---
src/test/modules/worker_spi/worker_spi.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/test/modules/worker_spi/worker_spi.c
b/src/test/modules/worker_spi/worker_spi.c
index 1c7b17c..daf6c1d 100644
--- a/src/test/modules/worker_spi/worker_spi.c
+++ b/src/test/modules/worker_spi/worker_spi.c
@@ -113,16 +113,17 @@ initialize_worker_spi(worktable *table)
SPI_connect();
PushActiveSnapshot(GetTransactionSnapshot());
pgstat_report_activity(STATE_RUNNING, "initializing worker_spi schema");
/* XXX could we use CREATE SCHEMA IF NOT EXISTS? */
initStringInfo(&buf);
appendStringInfo(&buf, "select count(*) from pg_namespace where nspname
= '%s'",
table->schema);
+ debug_query_string = buf.data;
ret = SPI_execute(buf.data, true, 0);
if (ret != SPI_OK_SELECT)
elog(FATAL, "SPI_execute failed: error code %d", ret);
if (SPI_processed != 1)
elog(FATAL, "not a singleton result");
@@ -138,30 +139,32 @@ initialize_worker_spi(worktable *table)
appendStringInfo(&buf,
"CREATE SCHEMA \"%s\" "
"CREATE TABLE \"%s\" ("
" type text CHECK
(type IN ('total', 'delta')), "
" value
integer)"
"CREATE UNIQUE INDEX
\"%s_unique_total\" ON \"%s\" (type) "
"WHERE type = 'total'",
table->schema, table->name,
table->name, table->name);
+ debug_query_string = buf.data;
/* set statement start time */
SetCurrentStatementStartTimestamp();
ret = SPI_execute(buf.data, false, 0);
if (ret != SPI_OK_UTILITY)
elog(FATAL, "failed to create my schema");
}
SPI_finish();
PopActiveSnapshot();
CommitTransactionCommand();
pgstat_report_activity(STATE_IDLE, NULL);
+ debug_query_string = NULL;
}
void
worker_spi_main(Datum main_arg)
{
int index = DatumGetInt32(main_arg);
worktable *table;
StringInfoData buf;
@@ -206,16 +209,17 @@ worker_spi_main(Datum main_arg)
"UPDATE %s.%s "
"SET value = %s.value + total.sum "
"FROM total WHERE type = 'total' "
"RETURNING %s.value",
table->schema, table->name,
table->schema, table->name,
table->name,
table->name);
+ debug_query_string = buf.data;
/*
* Main loop: do this until the SIGTERM handler tells us to terminate
*/
while (!got_sigterm)
{
int ret;