Hi,all I analyzed the btree block where lwlock deadlock occurred, as follows:
## insert process(ginInsertValue()) 644(root blkno) | 7054(2. held LWLock:0x2aaac587ae64) ----rightlink----> xxxx(3. Acquire LWLock:0x2aaab4009564,buffer = 2119038,blkno should be 9954) | 701(1. held LWLock:0x2aaab670dfe4) The ginInsertValue() function above gets the lwlock in the order described in the README. src/backend/access/gin/README --------------------------------------------------------------- To avoid deadlocks, B-tree pages must always be locked in the same order: left to right, and bottom to top. ... ----------------------------------------------------------------- ## autovacuum process(ginScanToDelete()) 644(root blkno) | ... 9954(1. held LWLock:0x2aaab4009564) ... | 701(2. Acquire LWLock:0x2aaab670dfe4) note:according to autovacuum's core 701's parent is 9954;while insert's core shows 701's parent is 7054, rightlink of 7054 is 9954! However, ginScanToDelete() depth-first scans the btree and gets the EXCLUSIVE lock, which creates a deadlock. Is the above statement correct? If so, deadlocks should easily happen. static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { ... if (!isRoot) LockBuffer(buffer, GIN_EXCLUSIVE); ... for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = GinDataPageGetPostingItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } ... } src/backend/access/gin/README ----------------------------------------------------------------- The previous paragraph's reasoning only applies to searches, and only to posting trees. To protect from inserters following a downlink to a deleted page, vacuum simply locks out all concurrent insertions to the posting tree, by holding a super-exclusive lock on the parent page of subtree with deletable pages. ... posting tree. To exclude interference with readers vacuum takes exclusive locks in a depth-first scan in left-to-right order of page tuples. ... ----------------------------------------------------------------- # stacks ## stack of insert:Acquire lock 0x2aaab4009564(blkno:9954) and hold 0x2aaab670dfe4(blkno:701), 0x2aaac587ae64(blkno:7054) ----------------------------------------------------------------- (gdb) bt #0 0x00007fe11552379b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0 #1 0x00007fe11552382f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0 #2 0x00007fe1155238cb in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0 #3 0x000000000069d362 in PGSemaphoreLock (sema=0x2aaaaac02958) at pg_sema.c:310 #4 0x00000000007095ac in LWLockAcquire (lock=0x2aaab4009564, mode=LW_EXCLUSIVE) at lwlock.c:1233 #5 0x0000000000490a32 in ginStepRight (buffer=6713826, index=<optimized out>, lockmode=lockmode@entry=2) at ginbtree.c:174 #6 0x0000000000490c1c in ginFinishSplit (btree=btree@entry=0x7ffd81e4f950, stack=0x28eebf8, freestack=freestack@entry=1 '\001', buildStats=buildStats@entry=0x0) at ginbtree.c:701 #7 0x0000000000491396 in ginInsertValue (btree=btree@entry=0x7ffd81e4f950, stack=<optimized out>, insertdata=insertdata@entry=0x7ffd81e4f940, buildStats=buildStats@entry=0x0) at ginbtree.c:773 #8 0x000000000048fb42 in ginInsertItemPointers (index=<optimized out>, rootBlkno=rootBlkno@entry=644, items=items@entry=0x2916598, nitem=nitem@entry=353, buildStats=buildStats@entry=0x0) at gindatapage.c:1907 #9 0x000000000048a9ea in ginEntryInsert (ginstate=ginstate@entry=0x28e7ef8, attnum=<optimized out>, key=42920440, category=<optimized out>, items=0x2916598, nitem=353, buildStats=buildStats@entry=0x0) at gininsert.c:214 #10 0x0000000000496d94 in ginInsertCleanup (ginstate=ginstate@entry=0x28e7ef8, full_clean=full_clean@entry=0 '\000', fill_fsm=fill_fsm@entry=1 '\001', forceCleanup=forceCleanup@entry=0 '\000', stats=stats@entry=0x0) at ginfast.c:883 #11 0x0000000000497727 in ginHeapTupleFastInsert (ginstate=ginstate@entry=0x28e7ef8, collector=collector@entry=0x7ffd81e4fe60) at ginfast.c:448 #12 0x000000000048b209 in gininsert (index=<optimized out>, values=0x7ffd81e4ff40, isnull=0x7ffd81e50040 "", ht_ctid=0x280d98c, heapRel=<optimized out>, checkUnique=<optimized out>, indexInfo=0x28b5aa8) at gininsert.c:522 #13 0x00000000005ee8dd in ExecInsertIndexTuples (slot=slot@entry=0x28b5d58, tupleid=tupleid@entry=0x280d98c, estate=estate@entry=0x28b5288, noDupErr=noDupErr@entry=1 '\001', specConflict=specConflict@entry=0x7ffd81e5013b "", arbiterIndexes=arbiterIndexes@entry=0x28c6dd8) at execIndexing.c:386 #14 0x000000000060ccf5 in ExecInsert (canSetTag=1 '\001', estate=0x28b5288, onconflict=ONCONFLICT_UPDATE, arbiterIndexes=0x28c6dd8, planSlot=0x28b5d58, slot=0x28b5d58, mtstate=0x28b5628) at nodeModifyTable.c:564 #15 ExecModifyTable (pstate=0x28b5628) at nodeModifyTable.c:1766 #16 0x00000000005ef612 in standard_ExecutorRun (queryDesc=<optimized out>, direction=<optimized out>, count=<optimized out>, execute_once=<optimized out>) at ../../../src/include/executor/executor.h:250 #17 0x00007fe10607d15d in pgss_ExecutorRun (queryDesc=0x28cc1d8, direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at pg_stat_statements.c:889 #18 0x000000000071a7ba in ProcessQuery (plan=<optimized out>, sourceText=0x2849058 "INSERT INTO bi_dm.tdm_sncity_workorder_analytic_statistics (CITYCODE,CITYNAME,DISTRICT,DISTRICT_NAME,ZL_WERKS,ZL_WERKS_NAME,AREA_CD,AREA_NM,ZSIZE,DEPT_CD,TMALL_FLG,ORDER_DATE,DISTRIBUTE,WORKORDER,FNSH"..., params=0x28b2260, queryEnv=0x0, dest=0xc9e2a0 <donothingDR>, completionTag=0x7ffd81e507b0 "") at pquery.c:161 #19 0x000000000071a9f7 in PortalRunMulti (portal=portal@entry=0x289fdd8, isTopLevel=isTopLevel@entry=1 '\001', setHoldSnapshot=setHoldSnapshot@entry=0 '\000', dest=0xc9e2a0 <donothingDR>, dest@entry=0x284d778, altdest=0xc9e2a0 <donothingDR>, altdest@entry=0x284d778, completionTag=completionTag@entry=0x7ffd81e507b0 "") at pquery.c:1286 #20 0x000000000071b535 in PortalRun (portal=<optimized out>, count=1, isTopLevel=<optimized out>, run_once=<optimized out>, dest=0x284d778, altdest=0x284d778, completionTag=0x7ffd81e507b0 "") at pquery.c:799 #21 0x0000000000718f84 in PostgresMain (argc=<optimized out>, argv=<optimized out>, dbname=<optimized out>, username=<optimized out>) at postgres.c:1984 #22 0x000000000047ad3c in BackendRun (port=0x27cbc40) at postmaster.c:4405 #23 BackendStartup (port=0x27cbc40) at postmaster.c:4077 #24 ServerLoop () at postmaster.c:1755 #25 0x00000000006afacf in PostmasterMain (argc=argc@entry=3, argv=argv@entry=0x277ba20) at postmaster.c:1363 #26 0x000000000047bb6f in main (argc=3, argv=0x277ba20) at main.c:228 (gdb) p num_held_lwlocks $1 = 2 (gdb) p lwlocks $2 = {{lock = 0x2aaab670dfe4, mode = LW_EXCLUSIVE}, {lock = 0x2aaac587ae64, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac0a380, mode = LW_SHARED}, {lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, { lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, {lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, {lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, {lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, { lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, {lock = 0x2aaab670a2e4, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac15a80, mode = LW_EXCLUSIVE}, {lock = 0x0, mode = LW_EXCLUSIVE} <repeats 189 times>} (gdb) f 6 #6 0x0000000000490c1c in ginFinishSplit (btree=btree@entry=0x7ffd81e4f950, stack=0x28eebf8, freestack=freestack@entry=1 '\001', buildStats=buildStats@entry=0x0) at ginbtree.c:701 701 parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE); (gdb) p *stack $27 = {blkno = 701, buffer = 2758312, off = 0, iptr = {ip_blkid = {bi_hi = 5, bi_lo = 0}, ip_posid = 40884}, predictNumber = 1, parent = 0x28eea68} (gdb) p ((LWLock*) (&((&BufferDescriptors[(2758312 - 1)].bufferdesc))->content_lock)) $28 = (LWLock *) 0x2aaab670dfe4 (gdb) p *parent $29 = {blkno = 7054, buffer = 6713826, off = 0, iptr = {ip_blkid = {bi_hi = 8, bi_lo = 0}, ip_posid = 40886}, predictNumber = 1, parent = 0x28eeab8} (gdb) p ((LWLock*) (&((&BufferDescriptors[(6713826 - 1)].bufferdesc))->content_lock)) $32 = (LWLock *) 0x2aab4fa5cfe4 (gdb) p *(GinBtreeStack *)0x28eeab8 $31 = {blkno = 644, buffer = 6713825, off = 1, iptr = {ip_blkid = {bi_hi = 35, bi_lo = 0}, ip_posid = 40884}, predictNumber = 1, parent = 0x0} (gdb) p ((LWLock*) (&((&BufferDescriptors[(6713825 - 1)].bufferdesc))->content_lock)) $33 = (LWLock *) 0x2aaac587ae24 ----------------------------------------------------------------- ## stack of autovacuum:Acquire lock 0x2aaab670dfe4(blkno:701) and hold 0x2aaab4009564(blkno:9954) ----------------------------------------------------------------- #0 0x00007fe11552379b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0 #1 0x00007fe11552382f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0 #2 0x00007fe1155238cb in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0 #3 0x000000000069d362 in PGSemaphoreLock (sema=0x2aaaaac07eb8) at pg_sema.c:310 #4 0x00000000007095ac in LWLockAcquire (lock=0x2aaab670dfe4, mode=LW_EXCLUSIVE) at lwlock.c:1233 #5 0x00000000004947ef in ginScanToDelete (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=701, isRoot=isRoot@entry=0 '\000', parent=parent@entry=0x7ffd81e4c790, myoff=myoff@entry=37) at ginvacuum.c:262 #6 0x0000000000494874 in ginScanToDelete (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=blkno@entry=9954, isRoot=isRoot@entry=1 '\001', parent=parent@entry=0x7ffd81e4c790, myoff=myoff@entry=0) at ginvacuum.c:277 #7 0x0000000000494ed1 in ginVacuumPostingTreeLeaves (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=9954, isRoot=isRoot@entry=0 '\000') at ginvacuum.c:404 #8 0x0000000000494e21 in ginVacuumPostingTreeLeaves (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=644, isRoot=isRoot@entry=1 '\001') at ginvacuum.c:372 #9 0x0000000000495540 in ginVacuumPostingTree (rootBlkno=<optimized out>, gvs=0x7ffd81e4d0f0) at ginvacuum.c:426 #10 ginbulkdelete (info=0x7ffd81e4f720, stats=<optimized out>, callback=<optimized out>, callback_state=<optimized out>) at ginvacuum.c:649 #11 0x00000000005e1194 in lazy_vacuum_index (indrel=0x3146e28, stats=stats@entry=0x28ec200, vacrelstats=vacrelstats@entry=0x28ebc28) at vacuumlazy.c:1621 #12 0x00000000005e214d in lazy_scan_heap (aggressive=<optimized out>, nindexes=<optimized out>, Irel=<optimized out>, vacrelstats=<optimized out>, options=16, onerel=0x28ec1f8) at vacuumlazy.c:1311 #13 lazy_vacuum_rel (onerel=onerel@entry=0x3144fa8, options=options@entry=99, params=params@entry=0x289f270, bstrategy=<optimized out>) at vacuumlazy.c:258 #14 0x00000000005dfc35 in vacuum_rel (relid=relid@entry=2060396948, relation=relation@entry=0x7ffd81e4fe50, options=options@entry=99, ---Type <return> to continue, or q <return> to quit--- params=params@entry=0x289f270) at vacuum.c:1463 #15 0x00000000005e0ad2 in vacuum (options=99, relation=relation@entry=0x7ffd81e4fe50, relid=<optimized out>, params=params@entry=0x289f270, va_cols=va_cols@entry=0x0, bstrategy=<optimized out>, bstrategy@entry=0x289e988, isTopLevel=isTopLevel@entry=1 '\001') at vacuum.c:306 #16 0x00000000006a01ff in autovacuum_do_vac_analyze (bstrategy=0x289e988, tab=0x289f268) at autovacuum.c:3095 #17 do_autovacuum () at autovacuum.c:2450 #18 0x00000000006a06a9 in AutoVacWorkerMain (argv=0x0, argc=0) at autovacuum.c:1693 #19 0x00000000006a0f49 in StartAutoVacWorker () at autovacuum.c:1498 #20 0x00000000006aeaea in StartAutovacuumWorker () at postmaster.c:5462 #21 sigusr1_handler (postgres_signal_arg=<optimized out>) at postmaster.c:5159 #22 <signal handler called> #23 0x00007fe11334bb83 in __select_nocancel () from /lib64/libc.so.6 #24 0x000000000047a3a3 in ServerLoop () at postmaster.c:1719 #25 0x00000000006afacf in PostmasterMain (argc=argc@entry=3, argv=argv@entry=0x277ba20) at postmaster.c:1363 #26 0x000000000047bb6f in main (argc=3, argv=0x277ba20) at main.c:228 (gdb) f 4 #4 0x00000000007095ac in LWLockAcquire (lock=0x2aaab670dfe4, mode=LW_EXCLUSIVE) at lwlock.c:1233 1233PGSemaphoreLock(proc->sem); (gdb) p num_held_lwlocks $1 = 1 (gdb) p InterruptHoldoffCount $2 = 2 (gdb) p held_lwlocks $3 = {{lock = 0x2aaab4009564, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac0aa00, mode = LW_SHARED}, {lock = 0x2aaaba8da364, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac15900, mode = LW_EXCLUSIVE}, { lock = 0x2aaab66eb064, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac15900, mode = LW_EXCLUSIVE}, {lock = 0x2aaaaac15c80, mode = LW_EXCLUSIVE}, {lock = 0x0, mode = LW_EXCLUSIVE} <repeats 193 times>} (gdb) f 8 #8 0x0000000000494e21 in ginVacuumPostingTreeLeaves (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=644, isRoot=isRoot@entry=1 '\001') at ginvacuum.c:372 372 if (ginVacuumPostingTreeLeaves(gvs, children[i], FALSE)) (gdb) p i $13 = 2 (gdb) p children[1] $14 = 7054 (gdb) p children[2] $15 = 9954 #7 0x0000000000494ed1 in ginVacuumPostingTreeLeaves (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=9954, isRoot=isRoot@entry=0 '\000') at ginvacuum.c:404 404 ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber); (gdb) p buffer $16 = 2119038 (gdb) p ((LWLock*) (&((&BufferDescriptors[(2119038 - 1)].bufferdesc))->content_lock)) $17 = (LWLock *) 0x2aaab4009564 (gdb) f 5 #5 0x00000000004947ef in ginScanToDelete (gvs=gvs@entry=0x7ffd81e4d0f0, blkno=701, isRoot=isRoot@entry=0 '\000', parent=parent@entry=0x7ffd81e4c790, myoff=myoff@entry=37) at ginvacuum.c:262 262 LockBuffer(buffer, GIN_EXCLUSIVE); (gdb) p buffer $29 = 2758312 (gdb) p ((LWLock*) (&((&BufferDescriptors[(2758312 - 1)].bufferdesc))->content_lock)) $32 = (LWLock *) 0x2aaab670dfe4 -----------------------------------------------------------------