I wrote: > In short, my current belief is that Linux PPC64 fails when trying > to deliver a signal if there's right around 2KB of stack remaining, > even though it should be able to expand the stack and press on.
I figured I should try to remove some variables from the equation by demonstrating this claim without involving Postgres. The attached test program eats some stack space and then waits to be sent SIGUSR1. For some values of "some stack space", it dumps core: [tgl@postgresql-fedora ~]$ gcc -g -Wall -O1 stacktest.c [tgl@postgresql-fedora ~]$ ./a.out 1240000 & [1] 11796 [tgl@postgresql-fedora ~]$ kill -USR1 11796 [tgl@postgresql-fedora ~]$ signal delivered, stack base 0x7fffdc160000 top 0x7fffdc031420 (1240032 used) [1]+ Done ./a.out 1240000 [tgl@postgresql-fedora ~]$ ./a.out 1242000 & [1] 11797 [tgl@postgresql-fedora ~]$ kill -USR1 11797 [tgl@postgresql-fedora ~]$ [1]+ Segmentation fault (core dumped) ./a.out 1242000 [tgl@postgresql-fedora ~]$ uname -a Linux postgresql-fedora.novalocal 4.18.19-100.fc27.ppc64le #1 SMP Wed Nov 14 21:53:32 UTC 2018 ppc64le ppc64le ppc64le GNU/Linux I don't think any further proof is required that this is a kernel bug. Where would be a good place to file it? regards, tom lane
#include <stdio.h> #include <stdlib.h> #include <signal.h> #include <unistd.h> static char *stack_base_ptr; static char *stack_top_ptr; static volatile sig_atomic_t sig_occurred = 0; static void sigusr1_handler(int signal_arg) { sig_occurred = 1; } static void consume_stack(long stackdepth) { char stack_cur; if ((stack_base_ptr - &stack_cur) < stackdepth) consume_stack(stackdepth); else { stack_top_ptr = &stack_cur; while (!sig_occurred) ; } } int main(int argc, char **argv) { long stackdepth = atol(argv[1]); char stack_base; struct sigaction act; act.sa_handler = sigusr1_handler; sigemptyset(&act.sa_mask); act.sa_flags = 0; if (sigaction(SIGUSR1, &act, NULL) < 0) perror("sigaction"); stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL); consume_stack(stackdepth); if (sig_occurred) printf("signal delivered, stack base %p top %p (%zu used)\n", stack_base_ptr, stack_top_ptr, stack_base_ptr - stack_top_ptr); else printf("no signal delivered\n"); return 0; }