Hi,

I took a look at this today, doing a bit of stress-testing, and I can
get it to crash because of segfaults in pagetable_create (not sure if
the issue is there, it might be just a symptom of an issue elsewhere).

Attached is a shell script I use to run the stress test - it's using
'test' database, generates tables of different size and then runs
queries with various parameter combinations. It takes a while to trigger
the crash, so it might depend on timing or something like that.

I've also attached two examples of backtraces. I've also seen infinite
loop in pagetable_create, but the crashes are much more common.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
Core was generated by `postgres: postgres test [local] SELECT                   
                     '.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  MemoryContextAllocZero (context=context@entry=0x561c0598cf40, 
size=size@entry=48) at mcxt.c:852
852             ret = context->methods->alloc(context, size);
(gdb) bt
#0  MemoryContextAllocZero (context=context@entry=0x561c0598cf40, 
size=size@entry=48) at mcxt.c:852
#1  0x0000561c0467b95c in pagetable_create (nelements=128, 
private_data=0x7f155f7ef000, ctx=0x561c0598cf40) at 
../../../src/include/lib/simplehash.h:457
#2  tbm_create_pagetable (tbm=tbm@entry=0x7f155f7ef000) at tidbitmap.c:296
#3  0x0000561c0467bae3 in tbm_get_pageentry (tbm=tbm@entry=0x7f155f7ef000, 
pageno=3779) at tidbitmap.c:1303
#4  0x0000561c0467bed5 in tbm_union_page (a=a@entry=0x7f155f7ef000, 
bpage=0x7f155f7e21b8) at tidbitmap.c:514
#5  0x0000561c0467c5a0 in tbm_union (b=0x561c059cd468, a=0x7f155f7ef000) at 
tidbitmap.c:474
#6  tbm_union (a=0x7f155f7ef000, b=0x561c059cd468) at tidbitmap.c:457
#7  0x0000561c0467cb77 in tbm_merge (tbm=0x561c059cd468, dp_tbm=<optimized 
out>, dp_pagetable=0x7f155f8d4418) at tidbitmap.c:822
#8  0x0000561c0461c80f in BitmapHeapNext (node=node@entry=0x561c05996400) at 
nodeBitmapHeapscan.c:228
#9  0x0000561c0460f611 in ExecScanFetch (recheckMtd=0x561c0461bf10 
<BitmapHeapRecheck>, accessMtd=0x561c0461bfa0 <BitmapHeapNext>, 
node=0x561c05996400)
    at execScan.c:133
#10 ExecScan (node=0x561c05996400, accessMtd=0x561c0461bfa0 <BitmapHeapNext>, 
recheckMtd=0x561c0461bf10 <BitmapHeapRecheck>) at execScan.c:182
#11 0x0000561c04615d31 in ExecProcNode (node=0x561c05996400) at 
../../../src/include/executor/executor.h:247
#12 fetch_input_tuple (aggstate=aggstate@entry=0x561c05995d98) at nodeAgg.c:589
#13 0x0000561c046188c8 in agg_retrieve_direct (aggstate=<optimized out>) at 
nodeAgg.c:2356
#14 ExecAgg (pstate=<optimized out>) at nodeAgg.c:2171
#15 0x0000561c0461f487 in ExecProcNode (node=0x561c05995d98) at 
../../../src/include/executor/executor.h:247
#16 gather_getnext (gatherstate=0x561c05995bf8) at nodeGather.c:295
#17 ExecGather (pstate=0x561c05995bf8) at nodeGather.c:227
#18 0x0000561c04615d31 in ExecProcNode (node=0x561c05995bf8) at 
../../../src/include/executor/executor.h:247
#19 fetch_input_tuple (aggstate=aggstate@entry=0x561c059955d0) at nodeAgg.c:589
#20 0x0000561c046188c8 in agg_retrieve_direct (aggstate=<optimized out>) at 
nodeAgg.c:2356
#21 ExecAgg (pstate=<optimized out>) at nodeAgg.c:2171
#22 0x0000561c04606b4b in ExecProcNode (node=0x561c059955d0) at 
../../../src/include/executor/executor.h:247
#23 ExecutePlan (execute_once=<optimized out>, dest=0x561c059c6550, 
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>, 
    operation=CMD_SELECT, use_parallel_mode=<optimized out>, 
planstate=0x561c059955d0, estate=0x561c05995378) at execMain.c:1542
#24 standard_ExecutorRun (queryDesc=0x561c058fdac8, direction=<optimized out>, 
count=0, execute_once=<optimized out>) at execMain.c:364
#25 0x0000561c0475f39c in PortalRunSelect (portal=0x561c0593f758, 
forward=<optimized out>, count=0, dest=<optimized out>) at pquery.c:912
#26 0x0000561c04760546 in PortalRun (portal=portal@entry=0x561c0593f758, 
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, 
    run_once=run_once@entry=true, dest=dest@entry=0x561c059c6550, 
altdest=altdest@entry=0x561c059c6550, qc=0x7ffd52084c80) at pquery.c:756
#27 0x0000561c0475c33c in exec_simple_query (query_string=0x561c058dbce8 
"select count(b)from t where a between 3387 and 4027;") at postgres.c:1239
#28 0x0000561c0475dee0 in PostgresMain (argc=argc@entry=1, 
argv=argv@entry=0x7ffd520850e0, dbname=<optimized out>, username=<optimized 
out>)
    at postgres.c:4305
#29 0x0000561c046e709c in BackendRun (port=<optimized out>, port=<optimized 
out>) at postmaster.c:4488
#30 BackendStartup (port=<optimized out>) at postmaster.c:4210
#31 ServerLoop () at postmaster.c:1727
#32 0x0000561c046e7f3f in PostmasterMain (argc=<optimized out>, 
argv=0x561c058d6550) at postmaster.c:1400
#33 0x0000561c0448d970 in main (argc=3, argv=0x561c058d6550) at main.c:209


Core was generated by `postgres: postgres test [local] EXPLAIN                  
                     '.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  MemoryContextAllocZero (context=context@entry=0x561c0598ed90, 
size=size@entry=48) at mcxt.c:852
852             ret = context->methods->alloc(context, size);
(gdb) bt
#0  MemoryContextAllocZero (context=context@entry=0x561c0598ed90, 
size=size@entry=48) at mcxt.c:852
#1  0x0000561c0467b95c in pagetable_create (nelements=128, 
private_data=0x7f155f7ef000, ctx=0x561c0598ed90) at 
../../../src/include/lib/simplehash.h:457
#2  tbm_create_pagetable (tbm=tbm@entry=0x7f155f7ef000) at tidbitmap.c:296
#3  0x0000561c0467bae3 in tbm_get_pageentry (tbm=tbm@entry=0x7f155f7ef000, 
pageno=3774) at tidbitmap.c:1303
#4  0x0000561c0467bed5 in tbm_union_page (a=a@entry=0x7f155f7ef000, 
bpage=0x7f155f7e2008) at tidbitmap.c:514
#5  0x0000561c0467c5a0 in tbm_union (b=0x561c059d0c78, a=0x7f155f7ef000) at 
tidbitmap.c:474
#6  tbm_union (a=0x7f155f7ef000, b=0x561c059d0c78) at tidbitmap.c:457
#7  0x0000561c0467cb77 in tbm_merge (tbm=0x561c059d0c78, dp_tbm=<optimized 
out>, dp_pagetable=0x7f155f8d4458) at tidbitmap.c:822
#8  0x0000561c0461c80f in BitmapHeapNext (node=node@entry=0x561c059b4910) at 
nodeBitmapHeapscan.c:228
#9  0x0000561c0460f611 in ExecScanFetch (recheckMtd=0x561c0461bf10 
<BitmapHeapRecheck>, accessMtd=0x561c0461bfa0 <BitmapHeapNext>, 
node=0x561c059b4910)
    at execScan.c:133
#10 ExecScan (node=0x561c059b4910, accessMtd=0x561c0461bfa0 <BitmapHeapNext>, 
recheckMtd=0x561c0461bf10 <BitmapHeapRecheck>) at execScan.c:182
#11 0x0000561c0460ccd9 in ExecProcNodeInstr (node=0x561c059b4910) at 
execProcnode.c:466
#12 0x0000561c04615d31 in ExecProcNode (node=0x561c059b4910) at 
../../../src/include/executor/executor.h:247
#13 fetch_input_tuple (aggstate=aggstate@entry=0x561c059b42a8) at nodeAgg.c:589
#14 0x0000561c046188c8 in agg_retrieve_direct (aggstate=<optimized out>) at 
nodeAgg.c:2356
#15 ExecAgg (pstate=<optimized out>) at nodeAgg.c:2171
#16 0x0000561c0460ccd9 in ExecProcNodeInstr (node=0x561c059b42a8) at 
execProcnode.c:466
#17 0x0000561c0461f487 in ExecProcNode (node=0x561c059b42a8) at 
../../../src/include/executor/executor.h:247
#18 gather_getnext (gatherstate=0x561c059b4108) at nodeGather.c:295
#19 ExecGather (pstate=0x561c059b4108) at nodeGather.c:227
#20 0x0000561c0460ccd9 in ExecProcNodeInstr (node=0x561c059b4108) at 
execProcnode.c:466
#21 0x0000561c04615d31 in ExecProcNode (node=0x561c059b4108) at 
../../../src/include/executor/executor.h:247
#22 fetch_input_tuple (aggstate=aggstate@entry=0x561c059b3ae0) at nodeAgg.c:589
#23 0x0000561c046188c8 in agg_retrieve_direct (aggstate=<optimized out>) at 
nodeAgg.c:2356
#24 ExecAgg (pstate=<optimized out>) at nodeAgg.c:2171
#25 0x0000561c0460ccd9 in ExecProcNodeInstr (node=0x561c059b3ae0) at 
execProcnode.c:466
#26 0x0000561c04606b4b in ExecProcNode (node=0x561c059b3ae0) at 
../../../src/include/executor/executor.h:247
#27 ExecutePlan (execute_once=<optimized out>, dest=0x561c04aedec0 
<donothingDR>, direction=<optimized out>, numberTuples=0, sendTuples=<optimized 
out>, 
    operation=CMD_SELECT, use_parallel_mode=<optimized out>, 
planstate=0x561c059b3ae0, estate=0x561c059b3888) at execMain.c:1542
#28 standard_ExecutorRun (queryDesc=0x561c059b2148, direction=<optimized out>, 
count=0, execute_once=<optimized out>) at execMain.c:364
#29 0x0000561c045b2edb in ExplainOnePlan 
(plannedstmt=plannedstmt@entry=0x561c059b20b8, into=into@entry=0x0, 
es=es@entry=0x561c05904e48, 
    queryString=queryString@entry=0x561c058dbce8 "explain (analyze, costs off, 
timing off) select count(b) from t where a between 2650 and 3290;", 
    params=params@entry=0x0, queryEnv=queryEnv@entry=0x0, 
planduration=0x7ffd520847c0, bufusage=0x0) at explain.c:571
#30 0x0000561c045b3294 in ExplainOneQuery (queryEnv=0x0, params=0x0, 
    queryString=0x561c058dbce8 "explain (analyze, costs off, timing off) select 
count(b) from t where a between 2650 and 3290;", es=0x561c05904e48, 
    into=0x0, cursorOptions=<optimized out>, query=<optimized out>) at 
explain.c:397
#31 ExplainOneQuery (query=<optimized out>, cursorOptions=<optimized out>, 
into=0x0, es=0x561c05904e48, 
    queryString=0x561c058dbce8 "explain (analyze, costs off, timing off) select 
count(b) from t where a between 2650 and 3290;", params=0x0, queryEnv=0x0)
    at explain.c:361
#32 0x0000561c045b3857 in ExplainQuery (pstate=pstate@entry=0x561c058fdc80, 
stmt=stmt@entry=0x561c058dcf60, params=params@entry=0x0, 
    dest=dest@entry=0x561c058fdbf0) at explain.c:275
#33 0x0000561c04761a21 in standard_ProcessUtility (pstmt=0x561c058dd6c8, 
    queryString=0x561c058dbce8 "explain (analyze, costs off, timing off) select 
count(b) from t where a between 2650 and 3290;", 
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, 
dest=0x561c058fdbf0, qc=0x7ffd52084a80) at utility.c:829
#34 0x0000561c0475f046 in PortalRunUtility (portal=0x561c059445c8, 
pstmt=0x561c058dd6c8, isTopLevel=<optimized out>, setHoldSnapshot=<optimized 
out>, 
    dest=0x561c058fdbf0, qc=0x7ffd52084a80) at pquery.c:1159
#35 0x0000561c0475fc9f in FillPortalStore (portal=0x561c059445c8, 
isTopLevel=<optimized out>) at ../../../src/include/nodes/pg_list.h:248
#36 0x0000561c0476062d in PortalRun (portal=portal@entry=0x561c059445c8, 
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, 
    run_once=run_once@entry=true, dest=dest@entry=0x561c05993628, 
altdest=altdest@entry=0x561c05993628, qc=0x7ffd52084c80) at pquery.c:751
#37 0x0000561c0475c33c in exec_simple_query (
    query_string=0x561c058dbce8 "explain (analyze, costs off, timing off) 
select count(b) from t where a between 2650 and 3290;") at postgres.c:1239
#38 0x0000561c0475dee0 in PostgresMain (argc=argc@entry=1, 
argv=argv@entry=0x7ffd520850e0, dbname=<optimized out>, username=<optimized 
out>)
    at postgres.c:4305
#39 0x0000561c046e709c in BackendRun (port=<optimized out>, port=<optimized 
out>) at postmaster.c:4488
#40 BackendStartup (port=<optimized out>) at postmaster.c:4210
#41 ServerLoop () at postmaster.c:1727
#42 0x0000561c046e7f3f in PostmasterMain (argc=<optimized out>, 
argv=0x561c058d6550) at postmaster.c:1400
#43 0x0000561c0448d970 in main (argc=3, argv=0x561c058d6550) at main.c:209


Attachment: parallel-bitmap-index-scan.sh
Description: application/shellscript

Reply via email to