Hi, today's testing with master as of d253b0f6e3 yielded two clusters that stopped processing queries. Symptoms:
regression=> select application_name, state, wait_event, wait_event_type, count(1), min(pid) from pg_stat_activity group by 1,2,3,4; application_name | state | wait_event | wait_event_type | count | min ------------------+--------+--------------------------------+-----------------+-------+------ psql | active | | | 1 | 3781 psql | active | DynamicSharedMemoryControlLock | LWLock | 2 | 3272 sqlsmith::dut | active | DynamicSharedMemoryControlLock | LWLock | 9 | 2726 sqlsmith::dut | active | MessageQueueSend | IPC | 1 | 2625 sqlsmith::dut | active | BgWorkerShutdown | IPC | 1 | 3655 sqlsmith::schema | idle | ClientRead | Client | 9 | 3634 (6 rows) regression=> select application_name, state, wait_event, wait_event_type, count(1), min(pid) from pg_stat_activity group by 1,2,3,4; application_name | state | wait_event | wait_event_type | count | min ------------------+--------+--------------------------------+-----------------+-------+------- psql | active | | | 1 | 29645 sqlsmith::dut | active | DynamicSharedMemoryControlLock | LWLock | 5 | 23167 sqlsmith::schema | idle | ClientRead | Client | 5 | 1169 sqlsmith::dut | active | BgWorkerShutdown | IPC | 1 | 1178 (4 rows) Both sport the following last error in their logfiles: FATAL: cannot unpin a segment that is not pinned Below are the backtraces of the processes throwing them. regards, Andreas Backtrace on dwagon: #0 sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:85 #1 0x00000000006b7c02 in PGSemaphoreLock (sema=0x7f8aa19b20d8) at pg_sema.c:310 #2 0x0000000000726b04 in LWLockAcquire (lock=0x7f8aa19b3400, mode=LW_EXCLUSIVE) at lwlock.c:1233 #3 0x0000000000710b56 in dsm_detach (seg=0x12cf6a0) at dsm.c:760 #4 0x0000000000710e6e in dsm_backend_shutdown () at dsm.c:643 #5 0x000000000071144f in shmem_exit (code=code@entry=1) at ipc.c:248 #6 0x000000000071150e in proc_exit_prepare (code=code@entry=1) at ipc.c:185 #7 0x0000000000711588 in proc_exit (code=code@entry=1) at ipc.c:102 #8 0x00000000008352ff in errfinish (dummy=dummy@entry=0) at elog.c:543 #9 0x0000000000838a8c in elog_finish (elevel=<optimized out>, fmt=0x9e8e70 "cannot unpin a segment that is not pinned") at elog.c:1378 #10 0x00000000007111ff in dsm_unpin_segment (handle=545777640) at dsm.c:917 #11 0x0000000000858205 in dsa_release_in_place (place=0x7f8aa524a178) at dsa.c:617 #12 0x0000000000710ae0 in dsm_detach (seg=0x12cf6a0) at dsm.c:734 #13 0x0000000000710e6e in dsm_backend_shutdown () at dsm.c:643 #14 0x000000000071144f in shmem_exit (code=code@entry=1) at ipc.c:248 #15 0x000000000071150e in proc_exit_prepare (code=code@entry=1) at ipc.c:185 #16 0x0000000000711588 in proc_exit (code=code@entry=1) at ipc.c:102 #17 0x00000000008352ff in errfinish (dummy=<optimized out>) at elog.c:543 #18 0x000000000073425c in ProcessInterrupts () at postgres.c:2837 #19 0x00000000004baeb5 in heapgetpage (scan=scan@entry=0x1376138, page=page@entry=0) at heapam.c:373 #20 0x00000000004bbaf0 in heapgettup_pagemode (key=0x1376550, nkeys=1, dir=ForwardScanDirection, scan=0x1376138) at heapam.c:830 #21 heap_getnext (scan=0x1376138, direction=direction@entry=ForwardScanDirection) at heapam.c:1804 #22 0x00000000004cf2be in systable_getnext (sysscan=sysscan@entry=0x13760e0) at genam.c:435 #23 0x0000000000823760 in ScanPgRelation (targetRelId=<optimized out>, indexOK=<optimized out>, force_non_historic=force_non_historic@entry=0 '\000') at relcache.c:357 #24 0x0000000000823ad3 in RelationReloadIndexInfo (relation=relation@entry=0x7f8aa52321e8) at relcache.c:2235 #25 0x0000000000827dd8 in RelationIdGetRelation (relationId=relationId@entry=2662) at relcache.c:2091 #26 0x00000000004bb088 in relation_open (relationId=relationId@entry=2662, lockmode=lockmode@entry=1) at heapam.c:1128 #27 0x00000000004cf8f6 in index_open (relationId=relationId@entry=2662, lockmode=lockmode@entry=1) at indexam.c:155 #28 0x00000000004cf226 in systable_beginscan (heapRelation=heapRelation@entry=0x7f8aa10e64a0, indexId=2662, indexOK=<optimized out>, snapshot=snapshot@entry=0x0, nkeys=1, key=key@entry=0x7ffe7b2bd260) at genam.c:340 #29 0x000000000081ef9c in SearchCatCache (cache=0x132cd10, v1=v1@entry=16528, v2=v2@entry=0, v3=v3@entry=0, v4=v4@entry=0) at catcache.c:1234 #30 0x000000000082d46e in SearchSysCache (cacheId=cacheId@entry=46, key1=key1@entry=16528, key2=key2@entry=0, key3=key3@entry=0, key4=key4@entry=0) at syscache.c:1108 #31 0x0000000000530b8c in pg_class_aclmask (table_oid=table_oid@entry=16528, roleid=roleid@entry=10, mask=mask@entry=2, how=how@entry=ACLMASK_ALL) at aclchk.c:3619 #32 0x000000000060944f in ExecCheckRTEPerms (rte=0x13702f8) at execMain.c:633 #33 ExecCheckRTPerms (rangeTable=0x1370698, ereport_on_violation=<optimized out>) at execMain.c:571 #34 0x0000000000609e1d in InitPlan (eflags=16, queryDesc=0x13a20e8) at execMain.c:812 #35 standard_ExecutorStart (queryDesc=0x13a20e8, eflags=16) at execMain.c:249 #36 0x000000000060c824 in ParallelQueryMain (seg=<optimized out>, toc=0x7f8aa524a000) at execParallel.c:849 #37 0x00000000004fa5aa in ParallelWorkerMain (main_arg=<optimized out>) at parallel.c:1109 #38 0x00000000006ba5a2 in StartBackgroundWorker () at bgworker.c:792 #39 0x00000000006c5952 in do_start_bgworker (rw=0x12ef180) at postmaster.c:5577 #40 maybe_start_bgworker () at postmaster.c:5761 #41 0x00000000006c6376 in sigusr1_handler (postgres_signal_arg=<optimized out>) at postmaster.c:5015 #42 <signal handler called> #43 0x00007f8aa45eeac3 in __select_nocancel () at ../sysdeps/unix/syscall-template.S:81 #44 0x0000000000476085 in ServerLoop () at postmaster.c:1693 #45 0x00000000006c7890 in PostmasterMain (argc=argc@entry=4, argv=argv@entry=0x12cd540) at postmaster.c:1337 #46 0x000000000047784d in main (argc=4, argv=0x12cd540) at main.c:228 Backtrace on seabisquit: #0 outNode (str=0x7fffbbf96dc0, obj=0x9) at outfuncs.c:3511 #1 0x000000000066ec2b in nodeToString (obj=0x9) at outfuncs.c:4160 #2 0x0000000000674b9b in pprint (obj=<optimized out>) at print.c:58 #3 <function called from gdb> #4 0x00007fc5bfc25457 in futex_abstimed_wait_cancelable (private=128, abstime=0x0, expected=0, futex_word=0x7fc5bc603058) at ../sysdeps/unix/sysv/linux/futex-internal.h:205 #5 do_futex_wait (sem=sem@entry=0x7fc5bc603058, abstime=0x0) at sem_waitcommon.c:111 #6 0x00007fc5bfc25504 in __new_sem_wait_slow (sem=0x7fc5bc603058, abstime=0x0) at sem_waitcommon.c:181 #7 0x00007fc5bfc25599 in __new_sem_wait (sem=sem@entry=0x7fc5bc603058) at sem_wait.c:29 #8 0x00000000006bc322 in PGSemaphoreLock (sema=0x7fc5bc603058) at pg_sema.c:310 #9 0x000000000072b72c in LWLockAcquire (lock=0x7fc5bc604400, mode=mode@entry=LW_EXCLUSIVE) at lwlock.c:1233 #10 0x00000000007156e0 in dsm_detach (seg=0x1ee36a0) at dsm.c:760 #11 0x0000000000715a0e in dsm_backend_shutdown () at dsm.c:643 #12 0x0000000000715fef in shmem_exit (code=code@entry=1) at ipc.c:248 #13 0x00000000007160ae in proc_exit_prepare (code=code@entry=1) at ipc.c:185 #14 0x0000000000716128 in proc_exit (code=code@entry=1) at ipc.c:102 #15 0x00000000008378df in errfinish (dummy=dummy@entry=0) at elog.c:543 #16 0x000000000083b06c in elog_finish (elevel=elevel@entry=20, fmt=fmt@entry=0x9eb330 "cannot unpin a segment that is not pinned") at elog.c:1378 #17 0x0000000000715dad in dsm_unpin_segment (handle=1204218611) at dsm.c:917 #18 0x000000000085a835 in dsa_release_in_place (place=0x7fc5bfe891c8) at dsa.c:617 #19 0x000000000071566f in dsm_detach (seg=0x1ee36a0) at dsm.c:734 #20 0x0000000000715a0e in dsm_backend_shutdown () at dsm.c:643 #21 0x0000000000715fef in shmem_exit (code=code@entry=1) at ipc.c:248 #22 0x00000000007160ae in proc_exit_prepare (code=code@entry=1) at ipc.c:185 #23 0x0000000000716128 in proc_exit (code=code@entry=1) at ipc.c:102 #24 0x00000000008378df in errfinish (dummy=<optimized out>) at elog.c:543 #25 0x0000000000738b9e in ProcessInterrupts () at postgres.c:2837 #26 0x00000000007393d9 in ProcessInterrupts () at postgres.c:2824 #27 0x0000000000610725 in ExecProcNode (node=node@entry=0x1fe7cb8) at execProcnode.c:396 #28 0x000000000060bfe6 in ExecutePlan (execute_once=<optimized out>, dest=0x1f6f640, direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>, operation=CMD_SELECT, use_parallel_mode=<optimized out>, planstate=0x1fe7cb8, estate=0x1faa830) at execMain.c:1646 #29 standard_ExecutorRun (queryDesc=0x1fb7ad0, direction=<optimized out>, count=0, execute_once=<optimized out>) at execMain.c:355 #30 0x000000000060f993 in ParallelQueryMain (seg=<optimized out>, toc=0x7fc5bfe89000) at execParallel.c:856 #31 0x00000000004f9eba in ParallelWorkerMain (main_arg=<optimized out>) at parallel.c:1109 #32 0x00000000006becd2 in StartBackgroundWorker () at bgworker.c:792 #33 0x00000000006ca042 in do_start_bgworker (rw=0x1f07780) at postmaster.c:5577 #34 maybe_start_bgworker () at postmaster.c:5761 #35 0x00000000006caa76 in sigusr1_handler (postgres_signal_arg=<optimized out>) at postmaster.c:5015 #36 <signal handler called> #37 0x00007fc5bf243e53 in __select_nocancel () at ../sysdeps/unix/syscall-template.S:84 #38 0x0000000000475f5d in ServerLoop () at postmaster.c:1693 #39 0x00000000006cbfa2 in PostmasterMain (argc=argc@entry=4, argv=argv@entry=0x1ee1540) at postmaster.c:1337 #40 0x0000000000477b6d in main (argc=4, argv=0x1ee1540) at main.c:228 -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers