On Mon, Mar 10, 2025 at 12:24:43PM +0100, Bill Allombert wrote:
> On Fri, Mar 07, 2025 at 11:45:10AM +0100, Bill Allombert wrote:
> > I am still unsure wether there is a race condition in PARI or not, however 
> > if
> > there is, it is clear that 2.41 makes the issue much worse.
> 
> Hello Aurélien,
> 
> I have made some progress. I have made a test program that does not use 
> PARI/GP.
> You need to compile with -O0 to hit the race condition.
> 
> So I am still uncertain if this is a bug in my test-suite or in the libc.
> I will gladly take any advice.

In that spirit, please find a third one, which fail with both libc, 
but in a different way.

About 10% of the time, it hangs in 'exit' after all threads have terminated.

Thread 1 "pthread" received signal SIGINT, Interrupt.
futex_wait (private=0, expected=2, futex_word=0x7ffff7ffda28 
<_rtld_global+2568>) at ../sysdeps/nptl/futex-internal.h:146
146     ../sysdeps/nptl/futex-internal.h: Aucun fichier ou dossier de ce type.
(gdb) bt
#0  futex_wait (private=0, expected=2, futex_word=0x7ffff7ffda28 
<_rtld_global+2568>) at ../sysdeps/nptl/futex-internal.h:146
#1  __GI___lll_lock_wait (futex=futex@entry=0x7ffff7ffda28 <_rtld_global+2568>, 
private=0) at ./nptl/lowlevellock.c:49
#2  0x00007ffff7e544aa in lll_mutex_lock_optimized (mutex=0x7ffff7ffda28 
<_rtld_global+2568>) at ./nptl/pthread_mutex_lock.c:48
#3  ___pthread_mutex_lock (mutex=0x7ffff7ffda28 <_rtld_global+2568>) at 
./nptl/pthread_mutex_lock.c:128
#4  0x00007ffff7fcf6f7 in _dl_fini () at ./elf/dl-fini.c:49
#5  0x00007ffff7e0655d in __run_exit_handlers (status=0, listp=0x7ffff7f9a820 
<__exit_funcs>,
    run_list_atexit=run_list_atexit@entry=true, run_dtors=run_dtors@entry=true) 
at ./stdlib/exit.c:116
#6  0x00007ffff7e0669a in __GI_exit (status=<optimized out>) at 
./stdlib/exit.c:146
#7  0x0000555555555c66 in main ()

I am puzzled.

Cheers,
Bill.
/* Copyright (C) 2013  The PARI group.

This file is part of the PARI/GP package.

PARI/GP is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version. It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY WHATSOEVER.

Check the License for details. You should have received a copy of it, along
with the package; see the file 'COPYING'. If not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <setjmp.h>
#include <unistd.h>

jmp_buf buf;

struct mt_state
{
  long workid;
};

struct pari_thread
{
  void * data;
};

struct pari_mt
{
  struct mt_state mt;
};

struct mt_queue
{
  long no;
  long input, output;
  long workid;
  long avma;
  pthread_cond_t cond;
  pthread_mutex_t mut;
  pthread_cond_t *pcond;
  pthread_mutex_t *pmut;
};


struct mt_pstate
{
  pthread_t *th;
  struct pari_thread *pth;
  struct mt_queue *mq;
  long n, nbint, last;
  long pending;
  pthread_cond_t pcond;
  pthread_mutex_t pmut;
};

static __thread long mt_thread_no = -1;
static struct mt_pstate *pari_mt;

#define LOCK(x) pthread_mutex_lock(x); do
#define UNLOCK(x) while(0); pthread_mutex_unlock(x)

void
mt_thread_init(void) { mt_thread_no = 0; }

void *
pari_thread_start(struct pari_thread *t)
{
  mt_thread_init();
  return t->data;
}

static void
mt_err_recover(void)
{
  if (mt_thread_no>=0)
  {
    struct mt_pstate *mt = pari_mt;
    struct mt_queue *mq = mt->mq+mt_thread_no;
    LOCK(mq->pmut)
    {
      mq->output = -1;
      pthread_cond_signal(mq->pcond);
    } UNLOCK(mq->pmut);
    pthread_exit((void*)-1);
  }
}

static void pari_mt_init(void)
{
  pari_mt = NULL;
}

static void
mt_queue_cleanup(void *arg)
{
  (void) arg;
}

static void
mt_queue_unlock(void *arg)
{ pthread_mutex_unlock((pthread_mutex_t*) arg); }

static void*
mt_queue_run(void *arg)
{
  void *args = pari_thread_start((struct pari_thread*) arg);
  struct mt_queue *mq = (struct mt_queue *) args;
  mt_thread_no = mq->no;
  pthread_cleanup_push(mt_queue_cleanup,NULL);
  LOCK(mq->pmut)
  {
    mq->avma = 1;
    pthread_cond_signal(mq->pcond);
  } UNLOCK(mq->pmut);
  for(;;)
  {
    long done;
    LOCK(&mq->mut)
    {
      pthread_cleanup_push(mt_queue_unlock, &mq->mut);
      while(!mq->input)
        pthread_cond_wait(&mq->cond, &mq->mut);
      pthread_cleanup_pop(0);
    } UNLOCK(&mq->mut);
    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,NULL);
    for(long u=1; u<100000;u++);
    mt_err_recover();
    done = -1;
    pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED,NULL);
    LOCK(mq->pmut)
    {
      mq->input = 0;
      mq->output = done;
      pthread_cond_signal(mq->pcond);
    } UNLOCK(mq->pmut);
  }
  pthread_cleanup_pop(1);
  return NULL;
}

static long
mt_queue_check(struct mt_pstate *mt)
{
  long i;
  for(i=0; i<mt->n; i++)
  {
    struct mt_queue *mq = mt->mq+i;
    if (mq->output) return i;
  }
  return -1;
}

static long
mtpthread_queue_get(long *workid, long *pending)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq;
  long done = 0;
  long last;
  if (mt->nbint<mt->n)
  {
    mt->last = mt->nbint;
    *pending = mt->pending;
    return 0;
  }
  LOCK(&mt->pmut)
  {
    while ((last = mt_queue_check(mt)) < 0)
    {
      pthread_cond_wait(&mt->pcond, &mt->pmut);
    }
  } UNLOCK(&mt->pmut);
  mq = mt->mq+last;
  done = mq->output;
  mq->output = 0;
  if (workid) *workid = mq->workid;
  if (done < 0)
    longjmp(buf, 1);
  mt->last = last;
  mt->pending--;
  *pending = mt->pending;
  return done;
}

static void
mtpthread_queue_submit(long workid, long work)
{
  struct mt_pstate *mt = pari_mt;
  struct mt_queue *mq = mt->mq+mt->last;
  if (!work) { mt->nbint=mt->n; return; }
  if (mt->nbint<mt->n)
  {
    mt->nbint++;
    LOCK(mq->pmut)
    {
      while(!mq->avma)
        pthread_cond_wait(mq->pcond, mq->pmut);
    } UNLOCK(mq->pmut);
  }
  LOCK(&mq->mut)
  {
    mq->output = 0;
    mq->workid = workid;
    mq->input = work;
    pthread_cond_signal(&mq->cond);
  } UNLOCK(&mq->mut);
  mt->pending++;
}

void
mt_queue_reset(void)
{
  struct mt_pstate *mt = pari_mt;
  long i;
  for (i=0; i<mt->n; i++)
    pthread_cancel(mt->th[i]);
  for (i=0; i<mt->n; i++)
    pthread_join(mt->th[i],NULL);
  pari_mt = NULL;
  for (i=0;i<mt->n;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    pthread_cond_destroy(&mq->cond);
    pthread_mutex_destroy(&mq->mut);
  }
  free(mt->mq);
  free(mt->pth);
  free(mt->th);
  free(mt);
}

void
mt_queue_start_lim(struct pari_mt *pt, long lim)
{
  struct mt_pstate *mt =
         (struct mt_pstate*) malloc(sizeof(struct mt_pstate));
  long i;
  mt->mq  = (struct mt_queue *) malloc(sizeof(*mt->mq)*lim);
  mt->th  = (pthread_t *) malloc(sizeof(*mt->th)*lim);
  mt->pth = (struct pari_thread *) malloc(sizeof(*mt->pth)*lim);
  mt->pending = 0;
  mt->n = lim;
  mt->nbint = 0;
  mt->last = 0;
  pthread_cond_init(&mt->pcond,NULL);
  pthread_mutex_init(&mt->pmut,NULL);
  for (i=0;i<lim;i++)
  {
    struct mt_queue *mq = mt->mq+i;
    mq->no     = i;
    mq->avma   = 0;
    mq->input  = 0;
    mq->output = 0;
    mq->pcond  = &mt->pcond;
    mq->pmut   = &mt->pmut;
    pthread_cond_init(&mq->cond,NULL);
    pthread_mutex_init(&mq->mut,NULL);
    mt->pth[i].data = (void*)mq;
  }
  for (i=0;i<lim;i++)
    pthread_create(&mt->th[i],NULL, &mt_queue_run, (void*)&mt->pth[i]);
  pari_mt = mt;
}

int
main(void)
{
  struct pari_mt pt;
  long i, j, workid = 0, pending = 0;
  pari_mt_init();
  if (setjmp(buf))
  {
    mt_queue_reset();
    printf("Done!\n");
    exit(0);
  }
  else
  {
    mt_queue_start_lim(&pt,10);
    for (i = 1; i < 20 || pending;i++)
    {
      long s = i<20 ? i: 0;
      mtpthread_queue_submit(i, s);
      j = mtpthread_queue_get(&workid, &pending);
      if (j<0) longjmp(buf,1);
    //  printf("%ld: %ld->%ld (%ld)\n",s,workid,j,pending);
    }
    mt_queue_reset();
  }
  printf("Exit!\n");
}

Reply via email to