On Fri, Mar 07, 2025 at 11:45:10AM +0100, Bill Allombert wrote:
> I am still unsure wether there is a race condition in PARI or not, however if
> there is, it is clear that 2.41 makes the issue much worse.

Hello Aurélien,

I have made some progress. I have made a test program that does not use PARI/GP.
You need to compile with -O0 to hit the race condition.

gcc  pthread1.c -Wall -O0 -g -pthread -o pthread1

Under gdb I get

Thread 8898 "pthread1" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffefda26c0 (LWP 3255602)]
__longjmp_cancel () at ../sysdeps/x86_64/__longjmp.S:147
warning: 147    ../sysdeps/x86_64/__longjmp.S: Aucun fichier ou dossier de ce 
nom
(gdb) bt
#0  __longjmp_cancel () at ../sysdeps/x86_64/__longjmp.S:147
#1  0xc8c15fa86bc00faf in ?? ()
Backtrace stopped: Cannot access memory at address 0xc8c15fa86bc00faf

The critical section seems to be

    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL);
    for(long u=1; u<10000*mq->input;u++);
    pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL);

I also join pthread2.c which is nearly identical.
This one (also only with glibc 2.41) exit prematuraly without printing "Done".
This is very strange.

So I am still uncertain if this is a bug in my test-suite or in the libc.
I will gladly take any advice.

Cheers,
-- 
Bill. <ballo...@debian.org>

Imagine a large red swirl here. 
/* Copyright (C) 2013  The PARI group.

This file is part of the PARI/GP package.

PARI/GP is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version. It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY WHATSOEVER.

Check the License for details. You should have received a copy of it, along
with the package; see the file 'COPYING'. If not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

struct mt_state
{
  long workid;
};

struct pari_thread
{
  void * data;
};

struct pari_mt
{
  struct mt_state mt;
};

struct mt_queue
{
  long no;
  long input, output;
  long workid;
  long avma;
  pthread_cond_t cond;
  pthread_mutex_t mut;
  pthread_cond_t *pcond;
  pthread_mutex_t *pmut;
};


struct mt_pstate
{
  pthread_t *th;
  struct pari_thread *pth;
  struct mt_queue *mq;
  long n, nbint, last;
  long pending;
  pthread_cond_t pcond;
  pthread_mutex_t pmut;
};

static __thread long mt_thread_no = -1;
static struct mt_pstate *pari_mt;

#define LOCK(x) pthread_mutex_lock(x); do
#define UNLOCK(x) while(0); pthread_mutex_unlock(x)

void
mt_thread_init(void) { mt_thread_no = 0; }

void *
pari_thread_start(struct pari_thread *t)
{
  mt_thread_init();
  return t->data;
}

static void pari_mt_init(void)
{
  pari_mt = NULL;
}

static void
mt_queue_cleanup(void *arg)
{
  (void) arg;
}

static void
mt_queue_unlock(void *arg)
{ pthread_mutex_unlock((pthread_mutex_t*) arg); }

static void*
mt_queue_run(void *arg)
{
  void *args = pari_thread_start((struct pari_thread*) arg);
  struct mt_queue *mq = (struct mt_queue *) args;
  mt_thread_no = mq->no;
  pthread_cleanup_push(mt_queue_cleanup,NULL);
  LOCK(mq->pmut)
  {
    mq->avma = 1;
    pthread_cond_signal(mq->pcond);
  } UNLOCK(mq->pmut);
  for(;;)
  {
    long done;
    LOCK(&mq->mut)
    {
      pthread_cleanup_push(mt_queue_unlock, &mq->mut);
      while(!mq->input)
        pthread_cond_wait(&mq->cond, &mq->mut);
      pthread_cleanup_pop(0);
    } UNLOCK(&mq->mut);
    done = -1;
    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL);
    for(long u=1; u<10000*mq->input;u++);
    pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL);
    LOCK(mq->pmut)
    {
      mq->input = 0;
      mq->output = done;
      pthread_cond_signal(mq->pcond);
    } UNLOCK(mq->pmut);
  }
  pthread_cleanup_pop(1);
  return NULL;
}

static long
mt_queue_check(struct mt_pstate *mt)
{
  long i;
  for(i=0; i<mt->n; i++)
  {
    struct mt_queue *mq = mt->mq+i;
    if (mq->output) return i;
  }
  return -1;
}

static long
mtpthread_queue_get(long *workid, long *pending)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq;
  long done = 0;
  long last;
  if (mt->nbint<mt->n)
  {
    mt->last = mt->nbint;
    *pending = mt->pending;
    return 0;
  }
  LOCK(&mt->pmut)
  {
    while ((last = mt_queue_check(mt)) < 0)
    {
      pthread_cond_wait(&mt->pcond, &mt->pmut);
    }
  } UNLOCK(&mt->pmut);
  mq = mt->mq+last;
  done = mq->output;
  mq->output = 0;
  if (workid) *workid = mq->workid;
  mt->last = last;
  mt->pending--;
  *pending = mt->pending;
  return done;
}

static void
mtpthread_queue_submit(long workid, long work)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq = mt->mq+mt->last;
  if (!work) { mt->nbint=mt->n; return; }
  if (mt->nbint<mt->n)
  {
    mt->nbint++;
    LOCK(mq->pmut)
    {
      while(!mq->avma)
        pthread_cond_wait(mq->pcond, mq->pmut);
    } UNLOCK(mq->pmut);
  }
  LOCK(&mq->mut)
  {
    mq->output = 0;
    mq->workid = workid;
    mq->input = work;
    pthread_cond_signal(&mq->cond);
  } UNLOCK(&mq->mut);
  mt->pending++;
}

void
mt_queue_reset(void)
{
  struct mt_pstate *mt = pari_mt;
  long i;
  for (i=0; i<mt->n; i++)
    pthread_cancel(mt->th[i]);
  for (i=0; i<mt->n; i++)
    pthread_join(mt->th[i],NULL);
  pari_mt = NULL;
  for (i=0;i<mt->n;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    pthread_cond_destroy(&mq->cond);
    pthread_mutex_destroy(&mq->mut);
  }
  free(mt->mq);
  free(mt->pth);
  free(mt->th);
  free(mt);
}

void
mt_queue_start_lim(struct pari_mt *pt, long lim)
{
  struct mt_pstate *mt =
         (struct mt_pstate*) malloc(sizeof(struct mt_pstate));
  long i;
  mt->mq  = (struct mt_queue *) malloc(sizeof(*mt->mq)*lim);
  mt->th  = (pthread_t *) malloc(sizeof(*mt->th)*lim);
  mt->pth = (struct pari_thread *) malloc(sizeof(*mt->pth)*lim);
  mt->pending = 0;
  mt->n = lim;
  mt->nbint = 0;
  mt->last = 0;
  pthread_cond_init(&mt->pcond,NULL);
  pthread_mutex_init(&mt->pmut,NULL);
  for (i=0;i<lim;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    mq->no     = i;
    mq->avma   = 0;
    mq->input  = 0;
    mq->output = 0;
    mq->pcond  = &mt->pcond;
    mq->pmut   = &mt->pmut;
    pthread_cond_init(&mq->cond,NULL);
    pthread_mutex_init(&mq->mut,NULL);
    mt->pth[i].data = (void*)mq;
  }
  for (i=0;i<lim;i++)
    pthread_create(&mt->th[i],NULL, &mt_queue_run, (void*)&mt->pth[i]);
  pari_mt = mt;
}

int
main(void)
{
  struct pari_mt pt;
  long i, j, k, workid = 0, pending = 0;
  pari_mt_init();
  for (k = 1; k<1000; k++)
  {
    long nmax = 40;
    mt_queue_start_lim(&pt,20);
    for (i = 1; i <= nmax || pending;i++)
    {
      long s = i <= nmax ? i: 0;
      mtpthread_queue_submit(i, s);
      j = mtpthread_queue_get(&workid, &pending);
      if (j<0) break;
    }
    mt_queue_reset();
  }
  printf("Done!\n");
}
/* Copyright (C) 2013  The PARI group.

This file is part of the PARI/GP package.

PARI/GP is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version. It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY WHATSOEVER.

Check the License for details. You should have received a copy of it, along
with the package; see the file 'COPYING'. If not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

struct mt_state
{
  long workid;
};

struct pari_thread
{
  void * data;
};

struct pari_mt
{
  struct mt_state mt;
};

struct mt_queue
{
  long no;
  long input, output;
  long workid;
  long avma;
  pthread_cond_t cond;
  pthread_mutex_t mut;
  pthread_cond_t *pcond;
  pthread_mutex_t *pmut;
};


struct mt_pstate
{
  pthread_t *th;
  struct pari_thread *pth;
  struct mt_queue *mq;
  long n, nbint, last;
  long pending;
  pthread_cond_t pcond;
  pthread_mutex_t pmut;
};

static __thread long mt_thread_no = -1;
static struct mt_pstate *pari_mt;

#define LOCK(x) pthread_mutex_lock(x); do
#define UNLOCK(x) while(0); pthread_mutex_unlock(x)

void
mt_thread_init(void) { mt_thread_no = 0; }

void *
pari_thread_start(struct pari_thread *t)
{
  mt_thread_init();
  return t->data;
}

static void pari_mt_init(void)
{
  pari_mt = NULL;
}

static void
mt_queue_cleanup(void *arg)
{
  (void) arg;
}

static void
mt_queue_unlock(void *arg)
{ pthread_mutex_unlock((pthread_mutex_t*) arg); }

static void*
mt_queue_run(void *arg)
{
  void *args = pari_thread_start((struct pari_thread*) arg);
  struct mt_queue *mq = (struct mt_queue *) args;
  mt_thread_no = mq->no;
  //pthread_cleanup_push(mt_queue_cleanup,NULL);
  LOCK(mq->pmut)
  {
    mq->avma = 1;
    pthread_cond_signal(mq->pcond);
  } UNLOCK(mq->pmut);
  for(;;)
  {
    long done;
    LOCK(&mq->mut)
    {
      pthread_cleanup_push(mt_queue_unlock, &mq->mut);
      while(!mq->input)
        pthread_cond_wait(&mq->cond, &mq->mut);
      pthread_cleanup_pop(0);
    } UNLOCK(&mq->mut);
    done = -1;
    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL);
    for(long u=1; u<10000*mq->input;u++);
    pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL);
    LOCK(mq->pmut)
    {
      mq->input = 0;
      mq->output = done;
      pthread_cond_signal(mq->pcond);
    } UNLOCK(mq->pmut);
  }
  //pthread_cleanup_pop(1);
  return NULL;
}

static long
mt_queue_check(struct mt_pstate *mt)
{
  long i;
  for(i=0; i<mt->n; i++)
  {
    struct mt_queue *mq = mt->mq+i;
    if (mq->output) return i;
  }
  return -1;
}

static long
mtpthread_queue_get(long *workid, long *pending)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq;
  long done = 0;
  long last;
  if (mt->nbint<mt->n)
  {
    mt->last = mt->nbint;
    *pending = mt->pending;
    return 0;
  }
  LOCK(&mt->pmut)
  {
    while ((last = mt_queue_check(mt)) < 0)
    {
      pthread_cond_wait(&mt->pcond, &mt->pmut);
    }
  } UNLOCK(&mt->pmut);
  mq = mt->mq+last;
  done = mq->output;
  mq->output = 0;
  if (workid) *workid = mq->workid;
  mt->last = last;
  mt->pending--;
  *pending = mt->pending;
  return done;
}

static void
mtpthread_queue_submit(long workid, long work)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq = mt->mq+mt->last;
  if (!work) { mt->nbint=mt->n; return; }
  if (mt->nbint<mt->n)
  {
    mt->nbint++;
    LOCK(mq->pmut)
    {
      while(!mq->avma)
        pthread_cond_wait(mq->pcond, mq->pmut);
    } UNLOCK(mq->pmut);
  }
  LOCK(&mq->mut)
  {
    mq->output = 0;
    mq->workid = workid;
    mq->input = work;
    pthread_cond_signal(&mq->cond);
  } UNLOCK(&mq->mut);
  mt->pending++;
}

void
mt_queue_reset(void)
{
  struct mt_pstate *mt = pari_mt;
  long i;
  for (i=0; i<mt->n; i++)
    pthread_cancel(mt->th[i]);
  for (i=0; i<mt->n; i++)
    pthread_join(mt->th[i],NULL);
  pari_mt = NULL;
  for (i=0;i<mt->n;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    pthread_cond_destroy(&mq->cond);
    pthread_mutex_destroy(&mq->mut);
  }
  free(mt->mq);
  free(mt->pth);
  free(mt->th);
  free(mt);
}

void
mt_queue_start_lim(struct pari_mt *pt, long lim)
{
  struct mt_pstate *mt =
         (struct mt_pstate*) malloc(sizeof(struct mt_pstate));
  long i;
  mt->mq  = (struct mt_queue *) malloc(sizeof(*mt->mq)*lim);
  mt->th  = (pthread_t *) malloc(sizeof(*mt->th)*lim);
  mt->pth = (struct pari_thread *) malloc(sizeof(*mt->pth)*lim);
  mt->pending = 0;
  mt->n = lim;
  mt->nbint = 0;
  mt->last = 0;
  pthread_cond_init(&mt->pcond,NULL);
  pthread_mutex_init(&mt->pmut,NULL);
  for (i=0;i<lim;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    mq->no     = i;
    mq->avma   = 0;
    mq->input  = 0;
    mq->output = 0;
    mq->pcond  = &mt->pcond;
    mq->pmut   = &mt->pmut;
    pthread_cond_init(&mq->cond,NULL);
    pthread_mutex_init(&mq->mut,NULL);
    mt->pth[i].data = (void*)mq;
  }
  for (i=0;i<lim;i++)
    pthread_create(&mt->th[i],NULL, &mt_queue_run, (void*)&mt->pth[i]);
  pari_mt = mt;
}

int
main(void)
{
  struct pari_mt pt;
  long i, j, k, workid = 0, pending = 0;
  pari_mt_init();
  for (k = 1; k<1000; k++)
  {
    long nmax = 40;
    mt_queue_start_lim(&pt,20);
    for (i = 1; i <= nmax || pending;i++)
    {
      long s = i <= nmax ? i: 0;
      mtpthread_queue_submit(i, s);
      j = mtpthread_queue_get(&workid, &pending);
      if (j<0) break;
    }
    mt_queue_reset();
  }
  printf("Done!\n");
}

Reply via email to