On Fri, Sep 09, 2022 at 09:06:37PM -0500, Justin Pryzby wrote: > #0 0x00007fb8a22f31f7 in raise () from /lib64/libc.so.6 > #1 0x00007fb8a22f48e8 in abort () from /lib64/libc.so.6 > #2 0x000000000098f9be in ExceptionalCondition > (conditionName=conditionName@entry=0x9fada4 "TransactionIdIsValid(xmax)", > errorType=errorType@entry=0x9ed217 "FailedAssertion", > fileName=fileName@entry=0x9fad90 "heapam_visibility.c", > lineNumber=lineNumber@entry=1353) at assert.c:69 > #3 0x00000000004fd4d7 in HeapTupleSatisfiesVacuumHorizon > (htup=htup@entry=0x7ffc225a87e0, buffer=buffer@entry=5100, > dead_after=dead_after@entry=0x7ffc225a87d0) at heapam_visibility.c:1353 > #4 0x0000000000501702 in heap_prune_satisfies_vacuum (buffer=5100, > tup=0x7ffc225a87e0, prstate=0x7ffc225a8a50) at pruneheap.c:504 > #5 heap_page_prune (relation=relation@entry=0x7fb8a50c3438, > buffer=buffer@entry=5100, vistest=vistest@entry=0xec7890 <GlobalVisDataRels>, > old_snap_xmin=<optimized out>, old_snap_ts=<optimized out>, > nnewlpdead=nnewlpdead@entry=0x7ffc225a964c, off_loc=off_loc@entry=0x0) at > pruneheap.c:351 > #6 0x0000000000502326 in heap_page_prune_opt (relation=0x7fb8a50c3438, > buffer=buffer@entry=5100) at pruneheap.c:209 > #7 0x00000000004f3ae3 in heapgetpage (sscan=sscan@entry=0x199b1d0, > page=page@entry=2892) at heapam.c:415 > #8 0x00000000004f44c2 in heapgettup_pagemode (scan=scan@entry=0x199b1d0, > dir=<optimized out>, nkeys=0, key=0x0) at heapam.c:1120 > #9 0x00000000004f5abe in heap_getnextslot (sscan=0x199b1d0, > direction=<optimized out>, slot=0x1967be8) at heapam.c:1352 > #10 0x00000000006de16b in table_scan_getnextslot (slot=0x1967be8, > direction=ForwardScanDirection, sscan=<optimized out>) at > ../../../src/include/access/tableam.h:1046 > #11 SeqNext (node=node@entry=0x1967a38) at nodeSeqscan.c:80 > #12 0x00000000006b109a in ExecScanFetch (recheckMtd=0x6de0f0 <SeqRecheck>, > accessMtd=0x6de100 <SeqNext>, node=0x1967a38) at execScan.c:133 > #13 ExecScan (node=0x1967a38, accessMtd=0x6de100 <SeqNext>, > recheckMtd=0x6de0f0 <SeqRecheck>) at execScan.c:199 > #14 0x00000000006add88 in ExecProcNodeInstr (node=0x1967a38) at > execProcnode.c:479 > #15 0x00000000006a6182 in ExecProcNode (node=0x1967a38) at > ../../../src/include/executor/executor.h:259 > #16 ExecutePlan (execute_once=<optimized out>, dest=0x1988448, > direction=<optimized out>, numberTuples=0, sendTuples=true, > operation=CMD_SELECT, use_parallel_mode=<optimized out>, planstate=0x1967a38, > estate=0x19677a0) > at execMain.c:1636 > #17 standard_ExecutorRun (queryDesc=0x1996e80, direction=<optimized out>, > count=0, execute_once=<optimized out>) at execMain.c:363 > #18 0x00007fb8960913bd in pgss_ExecutorRun (queryDesc=0x1996e80, > direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at > pg_stat_statements.c:1010 > #19 0x00007fb895c6f781 in explain_ExecutorRun (queryDesc=0x1996e80, > direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at > auto_explain.c:320 > #20 0x000000000084976e in PortalRunSelect (portal=portal@entry=0x18fed30, > forward=forward@entry=true, count=0, count@entry=9223372036854775807, > dest=dest@entry=0x1988448) at pquery.c:924 > #21 0x000000000084af4f in PortalRun (portal=0x18fed30, > count=9223372036854775807, isTopLevel=<optimized out>, run_once=<optimized > out>, dest=0x1988448, altdest=0x1988448, qc=0x7ffc225a9ce0) at pquery.c:768 > #22 0x000000000084679b in exec_simple_query (query_string=0x186d8a0 "SELECT > alarm_id, alarm_disregard FROM alarms WHERE alarm_ack_time IS NULL AND > alarm_clear_time IS NULL AND alarm_office = 'ETHERNET'") at postgres.c:1250 > #23 0x000000000084848a in PostgresMain (dbname=<optimized out>, > username=<optimized out>) at postgres.c:4581 > #24 0x0000000000495afe in BackendRun (port=<optimized out>, port=<optimized > out>) at postmaster.c:4504 > #25 BackendStartup (port=0x1894250) at postmaster.c:4232 > #26 ServerLoop () at postmaster.c:1806 > #27 0x00000000007b0c60 in PostmasterMain (argc=argc@entry=3, > argv=argv@entry=0x1868280) at postmaster.c:1478 > #28 0x00000000004976a6 in main (argc=3, argv=0x1868280) at main.c:202 > > I saved a copy of the data dir, but I'm going to have to bring the DB back > online soon. I don't know if I can get a page image easily since GDB is a bit > busted (won't show source code) and I can't remember how I fixed that last > time...
Actually gdb seems to be fine, except it doesn't work well on the corefile. Breakpoint 4, HeapTupleSatisfiesVacuumHorizon (htup=htup@entry=0x7ffcbdc89970, buffer=buffer@entry=690, dead_after=dead_after@entry=0x7ffcbdc898fc) at heapam_visibility.c:1196 1196 { (gdb) n 1197 HeapTupleHeader tuple = htup->t_data; (gdb) p *tuple $21 = {t_choice = {t_heap = {t_xmin = 3779887563, t_xmax = 133553150, t_field3 = {t_cid = 103, t_xvac = 103}}, t_datum = {datum_len_ = -515079733, datum_typmod = 133553150, datum_typeid = 103}}, t_ctid = {ip_blkid = { bi_hi = 0, bi_lo = 30348}, ip_posid = 11}, t_infomask2 = 8225, t_infomask = 4423, t_hoff = 32 ' ', t_bits = 0x2aaab37ebe0f "\303\377\367\377\001"} (gdb) n 1350 Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); (gdb) p xmax $20 = 0 (gdb) n 1353 Assert(TransactionIdIsValid(xmax)); (gdb) down #3 0x00000000004f3817 in heap_page_prune_opt (relation=0x7f5c989043e0, buffer=buffer@entry=690) at pruneheap.c:209 209 ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin, (gdb) l 204 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) 205 { (gdb) p ((PageHeader) page)->pd_flags $26 = 3 => src/include/storage/bufpage.h-#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */ src/include/storage/bufpage.h:#define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */ (gdb) p PageGetHeapFreeSpace(page) $28 = 180 (gdb) p minfree $29 = 819 ts=# SELECT * FROM verify_heapam('child.alarms_null'); blkno | offnum | attnum | msg -------+--------+--------+----------------------------------------------------------------- 2892 | 9 | | update xid is invalid 10336 | 9 | | update xid is invalid 14584 | 5 | | update xid is invalid 30449 | 3 | | update xid is invalid 30900 | 2 | | xmin 3779946727 precedes relation freeze threshold 1:3668608554 43078 | 2 | | update xid is invalid 43090 | 1 | | update xid is invalid (7 rows) child | alarms_null | table | telsasoft | permanent | heap | 357 MB |