On Fri, Mar 07, 2025 at 11:45:10AM +0100, Bill Allombert wrote: > I am still unsure wether there is a race condition in PARI or not, however if > there is, it is clear that 2.41 makes the issue much worse.
Hello Aurélien, I have made some progress. I have made a test program that does not use PARI/GP. You need to compile with -O0 to hit the race condition. gcc pthread1.c -Wall -O0 -g -pthread -o pthread1 Under gdb I get Thread 8898 "pthread1" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffefda26c0 (LWP 3255602)] __longjmp_cancel () at ../sysdeps/x86_64/__longjmp.S:147 warning: 147 ../sysdeps/x86_64/__longjmp.S: Aucun fichier ou dossier de ce nom (gdb) bt #0 __longjmp_cancel () at ../sysdeps/x86_64/__longjmp.S:147 #1 0xc8c15fa86bc00faf in ?? () Backtrace stopped: Cannot access memory at address 0xc8c15fa86bc00faf The critical section seems to be pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL); for(long u=1; u<10000*mq->input;u++); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL); I also join pthread2.c which is nearly identical. This one (also only with glibc 2.41) exit prematuraly without printing "Done". This is very strange. So I am still uncertain if this is a bug in my test-suite or in the libc. I will gladly take any advice. Cheers, -- Bill. <ballo...@debian.org> Imagine a large red swirl here.
/* Copyright (C) 2013 The PARI group. This file is part of the PARI/GP package. PARI/GP is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY WHATSOEVER. Check the License for details. You should have received a copy of it, along with the package; see the file 'COPYING'. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> struct mt_state { long workid; }; struct pari_thread { void * data; }; struct pari_mt { struct mt_state mt; }; struct mt_queue { long no; long input, output; long workid; long avma; pthread_cond_t cond; pthread_mutex_t mut; pthread_cond_t *pcond; pthread_mutex_t *pmut; }; struct mt_pstate { pthread_t *th; struct pari_thread *pth; struct mt_queue *mq; long n, nbint, last; long pending; pthread_cond_t pcond; pthread_mutex_t pmut; }; static __thread long mt_thread_no = -1; static struct mt_pstate *pari_mt; #define LOCK(x) pthread_mutex_lock(x); do #define UNLOCK(x) while(0); pthread_mutex_unlock(x) void mt_thread_init(void) { mt_thread_no = 0; } void * pari_thread_start(struct pari_thread *t) { mt_thread_init(); return t->data; } static void pari_mt_init(void) { pari_mt = NULL; } static void mt_queue_cleanup(void *arg) { (void) arg; } static void mt_queue_unlock(void *arg) { pthread_mutex_unlock((pthread_mutex_t*) arg); } static void* mt_queue_run(void *arg) { void *args = pari_thread_start((struct pari_thread*) arg); struct mt_queue *mq = (struct mt_queue *) args; mt_thread_no = mq->no; pthread_cleanup_push(mt_queue_cleanup,NULL); LOCK(mq->pmut) { mq->avma = 1; pthread_cond_signal(mq->pcond); } UNLOCK(mq->pmut); for(;;) { long done; LOCK(&mq->mut) { pthread_cleanup_push(mt_queue_unlock, &mq->mut); while(!mq->input) pthread_cond_wait(&mq->cond, &mq->mut); pthread_cleanup_pop(0); } UNLOCK(&mq->mut); done = -1; pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL); for(long u=1; u<10000*mq->input;u++); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL); LOCK(mq->pmut) { mq->input = 0; mq->output = done; pthread_cond_signal(mq->pcond); } UNLOCK(mq->pmut); } pthread_cleanup_pop(1); return NULL; } static long mt_queue_check(struct mt_pstate *mt) { long i; for(i=0; i<mt->n; i++) { struct mt_queue *mq = mt->mq+i; if (mq->output) return i; } return -1; } static long mtpthread_queue_get(long *workid, long *pending) { struct mt_pstate *mt = pari_mt; struct mt_queue *mq; long done = 0; long last; if (mt->nbint<mt->n) { mt->last = mt->nbint; *pending = mt->pending; return 0; } LOCK(&mt->pmut) { while ((last = mt_queue_check(mt)) < 0) { pthread_cond_wait(&mt->pcond, &mt->pmut); } } UNLOCK(&mt->pmut); mq = mt->mq+last; done = mq->output; mq->output = 0; if (workid) *workid = mq->workid; mt->last = last; mt->pending--; *pending = mt->pending; return done; } static void mtpthread_queue_submit(long workid, long work) { struct mt_pstate *mt = pari_mt; struct mt_queue *mq = mt->mq+mt->last; if (!work) { mt->nbint=mt->n; return; } if (mt->nbint<mt->n) { mt->nbint++; LOCK(mq->pmut) { while(!mq->avma) pthread_cond_wait(mq->pcond, mq->pmut); } UNLOCK(mq->pmut); } LOCK(&mq->mut) { mq->output = 0; mq->workid = workid; mq->input = work; pthread_cond_signal(&mq->cond); } UNLOCK(&mq->mut); mt->pending++; } void mt_queue_reset(void) { struct mt_pstate *mt = pari_mt; long i; for (i=0; i<mt->n; i++) pthread_cancel(mt->th[i]); for (i=0; i<mt->n; i++) pthread_join(mt->th[i],NULL); pari_mt = NULL; for (i=0;i<mt->n;i++) { struct mt_queue *mq = mt->mq+i; pthread_cond_destroy(&mq->cond); pthread_mutex_destroy(&mq->mut); } free(mt->mq); free(mt->pth); free(mt->th); free(mt); } void mt_queue_start_lim(struct pari_mt *pt, long lim) { struct mt_pstate *mt = (struct mt_pstate*) malloc(sizeof(struct mt_pstate)); long i; mt->mq = (struct mt_queue *) malloc(sizeof(*mt->mq)*lim); mt->th = (pthread_t *) malloc(sizeof(*mt->th)*lim); mt->pth = (struct pari_thread *) malloc(sizeof(*mt->pth)*lim); mt->pending = 0; mt->n = lim; mt->nbint = 0; mt->last = 0; pthread_cond_init(&mt->pcond,NULL); pthread_mutex_init(&mt->pmut,NULL); for (i=0;i<lim;i++) { struct mt_queue *mq = mt->mq+i; mq->no = i; mq->avma = 0; mq->input = 0; mq->output = 0; mq->pcond = &mt->pcond; mq->pmut = &mt->pmut; pthread_cond_init(&mq->cond,NULL); pthread_mutex_init(&mq->mut,NULL); mt->pth[i].data = (void*)mq; } for (i=0;i<lim;i++) pthread_create(&mt->th[i],NULL, &mt_queue_run, (void*)&mt->pth[i]); pari_mt = mt; } int main(void) { struct pari_mt pt; long i, j, k, workid = 0, pending = 0; pari_mt_init(); for (k = 1; k<1000; k++) { long nmax = 40; mt_queue_start_lim(&pt,20); for (i = 1; i <= nmax || pending;i++) { long s = i <= nmax ? i: 0; mtpthread_queue_submit(i, s); j = mtpthread_queue_get(&workid, &pending); if (j<0) break; } mt_queue_reset(); } printf("Done!\n"); }
/* Copyright (C) 2013 The PARI group. This file is part of the PARI/GP package. PARI/GP is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY WHATSOEVER. Check the License for details. You should have received a copy of it, along with the package; see the file 'COPYING'. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> struct mt_state { long workid; }; struct pari_thread { void * data; }; struct pari_mt { struct mt_state mt; }; struct mt_queue { long no; long input, output; long workid; long avma; pthread_cond_t cond; pthread_mutex_t mut; pthread_cond_t *pcond; pthread_mutex_t *pmut; }; struct mt_pstate { pthread_t *th; struct pari_thread *pth; struct mt_queue *mq; long n, nbint, last; long pending; pthread_cond_t pcond; pthread_mutex_t pmut; }; static __thread long mt_thread_no = -1; static struct mt_pstate *pari_mt; #define LOCK(x) pthread_mutex_lock(x); do #define UNLOCK(x) while(0); pthread_mutex_unlock(x) void mt_thread_init(void) { mt_thread_no = 0; } void * pari_thread_start(struct pari_thread *t) { mt_thread_init(); return t->data; } static void pari_mt_init(void) { pari_mt = NULL; } static void mt_queue_cleanup(void *arg) { (void) arg; } static void mt_queue_unlock(void *arg) { pthread_mutex_unlock((pthread_mutex_t*) arg); } static void* mt_queue_run(void *arg) { void *args = pari_thread_start((struct pari_thread*) arg); struct mt_queue *mq = (struct mt_queue *) args; mt_thread_no = mq->no; //pthread_cleanup_push(mt_queue_cleanup,NULL); LOCK(mq->pmut) { mq->avma = 1; pthread_cond_signal(mq->pcond); } UNLOCK(mq->pmut); for(;;) { long done; LOCK(&mq->mut) { pthread_cleanup_push(mt_queue_unlock, &mq->mut); while(!mq->input) pthread_cond_wait(&mq->cond, &mq->mut); pthread_cleanup_pop(0); } UNLOCK(&mq->mut); done = -1; pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL); for(long u=1; u<10000*mq->input;u++); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL); LOCK(mq->pmut) { mq->input = 0; mq->output = done; pthread_cond_signal(mq->pcond); } UNLOCK(mq->pmut); } //pthread_cleanup_pop(1); return NULL; } static long mt_queue_check(struct mt_pstate *mt) { long i; for(i=0; i<mt->n; i++) { struct mt_queue *mq = mt->mq+i; if (mq->output) return i; } return -1; } static long mtpthread_queue_get(long *workid, long *pending) { struct mt_pstate *mt = pari_mt; struct mt_queue *mq; long done = 0; long last; if (mt->nbint<mt->n) { mt->last = mt->nbint; *pending = mt->pending; return 0; } LOCK(&mt->pmut) { while ((last = mt_queue_check(mt)) < 0) { pthread_cond_wait(&mt->pcond, &mt->pmut); } } UNLOCK(&mt->pmut); mq = mt->mq+last; done = mq->output; mq->output = 0; if (workid) *workid = mq->workid; mt->last = last; mt->pending--; *pending = mt->pending; return done; } static void mtpthread_queue_submit(long workid, long work) { struct mt_pstate *mt = pari_mt; struct mt_queue *mq = mt->mq+mt->last; if (!work) { mt->nbint=mt->n; return; } if (mt->nbint<mt->n) { mt->nbint++; LOCK(mq->pmut) { while(!mq->avma) pthread_cond_wait(mq->pcond, mq->pmut); } UNLOCK(mq->pmut); } LOCK(&mq->mut) { mq->output = 0; mq->workid = workid; mq->input = work; pthread_cond_signal(&mq->cond); } UNLOCK(&mq->mut); mt->pending++; } void mt_queue_reset(void) { struct mt_pstate *mt = pari_mt; long i; for (i=0; i<mt->n; i++) pthread_cancel(mt->th[i]); for (i=0; i<mt->n; i++) pthread_join(mt->th[i],NULL); pari_mt = NULL; for (i=0;i<mt->n;i++) { struct mt_queue *mq = mt->mq+i; pthread_cond_destroy(&mq->cond); pthread_mutex_destroy(&mq->mut); } free(mt->mq); free(mt->pth); free(mt->th); free(mt); } void mt_queue_start_lim(struct pari_mt *pt, long lim) { struct mt_pstate *mt = (struct mt_pstate*) malloc(sizeof(struct mt_pstate)); long i; mt->mq = (struct mt_queue *) malloc(sizeof(*mt->mq)*lim); mt->th = (pthread_t *) malloc(sizeof(*mt->th)*lim); mt->pth = (struct pari_thread *) malloc(sizeof(*mt->pth)*lim); mt->pending = 0; mt->n = lim; mt->nbint = 0; mt->last = 0; pthread_cond_init(&mt->pcond,NULL); pthread_mutex_init(&mt->pmut,NULL); for (i=0;i<lim;i++) { struct mt_queue *mq = mt->mq+i; mq->no = i; mq->avma = 0; mq->input = 0; mq->output = 0; mq->pcond = &mt->pcond; mq->pmut = &mt->pmut; pthread_cond_init(&mq->cond,NULL); pthread_mutex_init(&mq->mut,NULL); mt->pth[i].data = (void*)mq; } for (i=0;i<lim;i++) pthread_create(&mt->th[i],NULL, &mt_queue_run, (void*)&mt->pth[i]); pari_mt = mt; } int main(void) { struct pari_mt pt; long i, j, k, workid = 0, pending = 0; pari_mt_init(); for (k = 1; k<1000; k++) { long nmax = 40; mt_queue_start_lim(&pt,20); for (i = 1; i <= nmax || pending;i++) { long s = i <= nmax ? i: 0; mtpthread_queue_submit(i, s); j = mtpthread_queue_get(&workid, &pending); if (j<0) break; } mt_queue_reset(); } printf("Done!\n"); }