I finally reproduced this with core..

For some reason I needed to write assert() rather than elog(PANIC), otherwise
it failed with ERROR and no core..

@@ -1741,4 +1743,5 @@ get_segment_by_index(dsa_area *area, dsa_segment_index 
index)
                segment = dsm_attach(handle);
+               assert (segment != NULL);
                if (segment == NULL)
-                       elog(ERROR, "dsa_area could not attach to segment");
+                       elog(PANIC, "dsa_area could not attach to segment");
                if (area->mapping_pinned)

On Mon, Dec 03, 2018 at 11:45:00AM +1300, Thomas Munro wrote:                   
                                                                                
                              
> If anyone can reproduce this problem with a debugger, it'd be                 
>                                                                               
>                                 
> interesting to see the output of dsa_dump(area), and                          
>                                                                               
>                                 
> FreePageManagerDump(segment_map->fpm).

Looks like this will take some work, is it ok if I make a coredump available to
you ?  I'm not sure how sensitive it is to re/compilation, but I'm using PG11.1
compiled locally on centos6.

/var/log/postgresql/postgresql-2019-02-05_111730.log-< 2019-02-05 11:17:31.372 
EST  >LOG:  background worker "parallel worker" (PID 17110) was terminated by 
signal 6: Aborted
/var/log/postgresql/postgresql-2019-02-05_111730.log:< 2019-02-05 11:17:31.372 
EST  >DETAIL:  Failed process was running: SELECT colcld.child c, parent p, 
array_agg(colpar.attname::text ORDER BY colpar.attnum) cols, 
array_agg(format_type(colpar.atttypid, colpar.atttypmod) ORDER BY 
colpar.attnum) AS types FROM queued_alters qa JOIN pg_attribute colpar ON 
to_regclass(qa.parent)=colpar.attrelid AND colpar.attnum>0 AND NOT 
colpar.attisdropped JOIN (SELECT *, attrelid::regclass::text AS child FROM 
pg_attribute) colcld ON to_regclass(qa.child) =colcld.attrelid AND 
colcld.attnum>0 AND NOT colcld.attisdropped WHERE colcld.attname=colpar.attname 
AND colpar.atttypid!=colcld.atttypid GROUP BY 1,2 ORDER BY parent LIKE 
'unused%', regexp_replace(colcld.child, 
'.*_((([0-9]{4}_[0-9]{2})_[0-9]{2})|(([0-9]{6})([0-9]{2})?))$', '\3\5') DESC, 
regexp_replace(colcld.child, '.*_', '') DESC LIMIT 1

(gdb) bt
#0  0x00000037b9c32495 in raise () from /lib64/libc.so.6
#1  0x00000037b9c33c75 in abort () from /lib64/libc.so.6
#2  0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
#3  0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
#4  0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value 
optimized out>) at dsa.c:1744
#5  0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at 
dsa.c:1995
#6  0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, 
flags=0) at dsa.c:703
#7  0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, 
size=104, shared=0x7ffc6b5cfc48) at nodeHash.c:2837
#8  0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, 
slot=<value optimized out>, hashvalue=423104953) at nodeHash.c:1693
#9  0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at 
nodeHash.c:288
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at 
nodeHashjoin.c:290
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
#13 0x0000000000638ce0 in ExecProcNodeInstr (node=0x2793038) at 
execProcnode.c:461
#14 0x00000000006349c7 in ExecProcNode (queryDesc=0x2782cd0, direction=<value 
optimized out>, count=0, execute_once=56) at 
../../../src/include/executor/executor.h:237
#15 ExecutePlan (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, 
execute_once=56) at execMain.c:1723
#16 standard_ExecutorRun (queryDesc=0x2782cd0, direction=<value optimized out>, 
count=0, execute_once=56) at execMain.c:364
#17 0x00007f84a97c8618 in pgss_ExecutorRun (queryDesc=0x2782cd0, 
direction=ForwardScanDirection, count=0, execute_once=true) at 
pg_stat_statements.c:892
#18 0x00007f84a93357dd in explain_ExecutorRun (queryDesc=0x2782cd0, 
direction=ForwardScanDirection, count=0, execute_once=true) at 
auto_explain.c:268
#19 0x0000000000635071 in ParallelQueryMain (seg=0x268fba8, toc=0x7f84a9578000) 
at execParallel.c:1402
#20 0x0000000000508f34 in ParallelWorkerMain (main_arg=<value optimized out>) 
at parallel.c:1409
#21 0x0000000000704760 in StartBackgroundWorker () at bgworker.c:834
#22 0x000000000070e11c in do_start_bgworker () at postmaster.c:5698
#23 maybe_start_bgworkers () at postmaster.c:5911
#24 0x0000000000710786 in sigusr1_handler (postgres_signal_arg=<value optimized 
out>) at postmaster.c:5091
#25 <signal handler called>
#26 0x00000037b9ce1603 in __select_nocancel () from /lib64/libc.so.6
#27 0x000000000071300e in ServerLoop (argc=<value optimized out>, argv=<value 
optimized out>) at postmaster.c:1670
#28 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at 
postmaster.c:1379
#29 0x000000000067e8c0 in main (argc=3, argv=0x265f960) at main.c:228

#0  0x00000037b9c32495 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00000037b9c33c75 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3  0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4  0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value 
optimized out>) at dsa.c:1744
        handle = <value optimized out>
        segment = 0x0
        segment_map = <value optimized out>
        __func__ = "get_segment_by_index"
        __PRETTY_FUNCTION__ = "get_segment_by_index"
#5  0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at 
dsa.c:1995
        segment_map = <value optimized out>
        next_segment_index = <value optimized out>
        contiguous_pages = <value optimized out>
        threshold = 512
        segment_index = 10
        bin = <value optimized out>
#6  0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, 
flags=0) at dsa.c:703
        npages = 8
        first_page = <value optimized out>
        span_pointer = 8796097199728
        pool = 0x7f84a9579730
        size_class = <value optimized out>
        start_pointer = <value optimized out>
        segment_map = <value optimized out>
        result = 140207753496128
        __func__ = "dsa_allocate_extended"
        __PRETTY_FUNCTION__ = "dsa_allocate_extended"
#7  0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, 
size=104, shared=0x7ffc6b5cfc48) at nodeHash.c:2837
        pstate = 0x7f84a9578540
        chunk_shared = <value optimized out>
        chunk = <value optimized out>
        chunk_size = 32768
        result = <value optimized out>
        curbatch = 0
#8  0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, 
slot=<value optimized out>, hashvalue=423104953) at nodeHash.c:1693
        hashTuple = <value optimized out>
        tuple = 0x27b00c8
        shared = <value optimized out>
        bucketno = 1577401
        batchno = 0
#9  0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at 
nodeHash.c:288
        outerNode = 0x27a1ff0
        hashkeys = 0x27af110
        slot = 0x27a3d70
        econtext = 0x27a3798
        hashvalue = 423104953
        i = <value optimized out>
        pstate = 0x7f84a9578540
        hashtable = 0x27affb0
        build_barrier = 0x7f84a9578590
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
No locals.
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at 
nodeHashjoin.c:290
        outerNode = 0x2792f20
        hashNode = 0x27a1ed8
        econtext = 0x2792c68
        outerTupleSlot = 0x1
        node = 0x2793038
        joinqual = 0x27ac270
        otherqual = 0x0
        hashtable = 0x27affb0
        hashvalue = 0
        batchno = 41493896
        parallel_state = 0x7f84a9578540
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
No locals.

Justin

Reply via email to