On Mon, Nov 8, 2010 at 2:33 PM, Arun R Bharadwaj <a...@linux.vnet.ibm.com> wrote: > diff --git a/Makefile.objs b/Makefile.objs > index cd5a24b..3b7ec27 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -9,6 +9,7 @@ qobject-obj-y += qerror.o > > block-obj-y = cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o > block-obj-y += nbd.o block.o aio.o aes.o osdep.o qemu-config.o > +block-obj-$(CONFIG_POSIX) += qemu-thread.o > block-obj-$(CONFIG_POSIX) += posix-aio-compat.o > block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o > > @@ -124,7 +125,6 @@ endif > common-obj-y += $(addprefix ui/, $(ui-obj-y)) > > common-obj-y += iov.o acl.o > -common-obj-$(CONFIG_THREAD) += qemu-thread.o > common-obj-y += notify.o event_notifier.o > common-obj-y += qemu-timer.o
This change makes CONFIG_THREAD unused. The ./configure code that sets CONFIG_THREAD=y should be removed. > diff --git a/posix-aio-compat.c b/posix-aio-compat.c > index 7b862b5..00b2a4e 100644 > --- a/posix-aio-compat.c > +++ b/posix-aio-compat.c > @@ -29,7 +29,32 @@ > #include "block_int.h" > > #include "block/raw-posix-aio.h" > +#include "qemu-thread.h" > > +#define MAX_GLOBAL_THREADS 64 > +#define MIN_GLOBAL_THREADS 8 > + > +QemuMutex aiocb_mutex; This variable should be static since it isn't used externally. > + > +typedef struct ThreadletQueue > +{ > + QemuMutex lock; > + QemuCond cond; > + int max_threads; > + int min_threads; > + int cur_threads; > + int idle_threads; > + QTAILQ_HEAD(, ThreadletWork) request_list; > +} ThreadletQueue; > + > +typedef struct ThreadletWork > +{ > + QTAILQ_ENTRY(ThreadletWork) node; > + void (*func)(struct ThreadletWork *work); > +} ThreadletWork; > + > +static ThreadletQueue globalqueue; > +static int globalqueue_init; > > struct qemu_paiocb { > BlockDriverAIOCB common; > @@ -44,13 +69,13 @@ struct qemu_paiocb { > int ev_signo; > off_t aio_offset; > > - QTAILQ_ENTRY(qemu_paiocb) node; > int aio_type; > ssize_t ret; > int active; > struct qemu_paiocb *next; > > int async_context_id; > + ThreadletWork work; > }; > > typedef struct PosixAioState { > @@ -58,64 +83,169 @@ typedef struct PosixAioState { > struct qemu_paiocb *first_aio; > } PosixAioState; > > +static void *threadlet_worker(void *data) > +{ > + ThreadletQueue *queue = data; > > -static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; > -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; > -static pthread_t thread_id; > -static pthread_attr_t attr; > -static int max_threads = 64; > -static int cur_threads = 0; > -static int idle_threads = 0; > -static QTAILQ_HEAD(, qemu_paiocb) request_list; > + qemu_mutex_lock(&queue->lock); > + while (1) { > + ThreadletWork *work; > + int ret = 0; > + > + while (QTAILQ_EMPTY(&queue->request_list) && > + (ret != ETIMEDOUT)) { > + /* wait for cond to be signalled or broadcast for 1000s */ > + ret = qemu_cond_timedwait((&queue->cond), > + &(queue->lock), 10*100000); > + } > > -#ifdef CONFIG_PREADV > -static int preadv_present = 1; > -#else > -static int preadv_present = 0; > -#endif > + assert(queue->idle_threads != 0); > + if (QTAILQ_EMPTY(&queue->request_list)) { > + if (queue->cur_threads > queue->min_threads) { > + /* We retain the minimum number of threads */ > + break; > + } > + } else { > + work = QTAILQ_FIRST(&queue->request_list); > + QTAILQ_REMOVE(&queue->request_list, work, node); > > -static void die2(int err, const char *what) > -{ > - fprintf(stderr, "%s failed: %s\n", what, strerror(err)); > - abort(); > + queue->idle_threads--; > + qemu_mutex_unlock(&queue->lock); > + > + /* execute the work function */ > + work->func(work); > + > + qemu_mutex_lock(&queue->lock); > + queue->idle_threads++; > + } > + } > + > + queue->idle_threads--; > + queue->cur_threads--; > + qemu_mutex_unlock(&queue->lock); > + > + return NULL; > } > > -static void die(const char *what) > +static void spawn_threadlet(ThreadletQueue *queue) > { > - die2(errno, what); > + QemuThread thread; > + > + queue->cur_threads++; > + queue->idle_threads++; > + > + qemu_thread_create(&thread, threadlet_worker, queue); > } > > -static void mutex_lock(pthread_mutex_t *mutex) > +/** > + * submit_work_to_queue: Submit a new task to a private queue to be > + * executed asynchronously. > + * @queue: Per-subsystem private queue to which the new task needs > + * to be submitted. > + * @work: Contains information about the task that needs to be submitted. > + */ > +static void submit_work_to_queue(ThreadletQueue *queue, ThreadletWork *work) > { > - int ret = pthread_mutex_lock(mutex); > - if (ret) die2(ret, "pthread_mutex_lock"); > + qemu_mutex_lock(&queue->lock); > + if (queue->idle_threads == 0 && queue->cur_threads < queue->max_threads) > { > + spawn_threadlet(queue); > + } else { > + qemu_cond_signal(&queue->cond); > + } > + QTAILQ_INSERT_TAIL(&queue->request_list, work, node); > + qemu_mutex_unlock(&queue->lock); > } > > -static void mutex_unlock(pthread_mutex_t *mutex) > +static void threadlet_queue_init(ThreadletQueue *queue, > + int max_threads, int min_threads); > + > +/** > + * submit_work: Submit to the global queue a new task to be executed > + * asynchronously. > + * @work: Contains information about the task that needs to be submitted. > + */ > +static void submit_work(ThreadletWork *work) > { > - int ret = pthread_mutex_unlock(mutex); > - if (ret) die2(ret, "pthread_mutex_unlock"); > + if (!globalqueue_init) { > + threadlet_queue_init(&globalqueue, MAX_GLOBAL_THREADS, > + MIN_GLOBAL_THREADS); > + globalqueue_init = 1; > + } > + > + submit_work_to_queue(&globalqueue, work); > } > > -static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, > - struct timespec *ts) > +/** > + * dequeue_work_on_queue: Cancel a task queued on a Queue. > + * @queue: The queue containing the task to be cancelled. > + * @work: Contains the information of the task that needs to be cancelled. > + * > + * Returns: 0 if the task is successfully cancelled. > + * 1 otherwise. > + */ > +static int dequeue_work_on_queue(ThreadletQueue *queue, ThreadletWork *work) > { > - int ret = pthread_cond_timedwait(cond, mutex, ts); > - if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait"); > + ThreadletWork *ret_work; > + int ret = 1; > + > + qemu_mutex_lock(&queue->lock); > + QTAILQ_FOREACH(ret_work, &(queue->request_list), node) { > + if (ret_work == work) { > + QTAILQ_REMOVE(&queue->request_list, ret_work, node); > + ret = 0; > + break; > + } > + } > + qemu_mutex_unlock(&queue->lock); > + > return ret; > } > > -static void cond_signal(pthread_cond_t *cond) > +/** > + * dequeue_work: Cancel a task queued on the global queue. > + * @work: Contains the information of the task that needs to be cancelled. > + * > + * Returns: 0 if the task is successfully cancelled. > + * 1 otherwise. > + */ > +static int dequeue_work(ThreadletWork *work) > +{ > + return dequeue_work_on_queue(&globalqueue, work); > +} > + > +/** > + * threadlet_queue_init: Initialize a threadlet queue. > + * @queue: The threadlet queue to be initialized. > + * @max_threads: Maximum number of threads processing the queue. > + * @min_threads: Minimum number of threads processing the queue. > + */ > +static void threadlet_queue_init(ThreadletQueue *queue, > + int max_threads, int min_threads) > +{ > + queue->cur_threads = 0; > + queue->idle_threads = 0; > + queue->max_threads = max_threads; > + queue->min_threads = min_threads; > + QTAILQ_INIT(&queue->request_list); > + qemu_mutex_init(&queue->lock); > + qemu_cond_init(&queue->cond); > +} > + > +#ifdef CONFIG_PREADV > +static int preadv_present = 1; > +#else > +static int preadv_present = 0; > +#endif > + > +static void die2(int err, const char *what) > { > - int ret = pthread_cond_signal(cond); > - if (ret) die2(ret, "pthread_cond_signal"); > + fprintf(stderr, "%s failed: %s\n", what, strerror(err)); > + abort(); > } > > -static void thread_create(pthread_t *thread, pthread_attr_t *attr, > - void *(*start_routine)(void*), void *arg) > +static void die(const char *what) > { > - int ret = pthread_create(thread, attr, start_routine, arg); > - if (ret) die2(ret, "pthread_create"); > + die2(errno, what); > } > > static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) > @@ -301,106 +431,59 @@ static ssize_t handle_aiocb_rw(struct qemu_paiocb > *aiocb) > return nbytes; > } > > -static void *aio_thread(void *unused) > +static void aio_thread(ThreadletWork *work) > { > pid_t pid; > + struct qemu_paiocb *aiocb = container_of(work, struct qemu_paiocb, work); > + ssize_t ret = 0; > > pid = getpid(); > + aiocb->active = 1; aiocb->active needs to be assigned with aiocb_mutex held and then released in order for this memory write to be visible to other threads after this line of code. > > - while (1) { > - struct qemu_paiocb *aiocb; > - ssize_t ret = 0; > - qemu_timeval tv; > - struct timespec ts; > - > - qemu_gettimeofday(&tv); > - ts.tv_sec = tv.tv_sec + 10; > - ts.tv_nsec = 0; > - > - mutex_lock(&lock); > - > - while (QTAILQ_EMPTY(&request_list) && > - !(ret == ETIMEDOUT)) { > - ret = cond_timedwait(&cond, &lock, &ts); > - } > - > - if (QTAILQ_EMPTY(&request_list)) > - break; > - > - aiocb = QTAILQ_FIRST(&request_list); > - QTAILQ_REMOVE(&request_list, aiocb, node); > - aiocb->active = 1; > - idle_threads--; > - mutex_unlock(&lock); > - > - switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { > - case QEMU_AIO_READ: > - case QEMU_AIO_WRITE: > - ret = handle_aiocb_rw(aiocb); > - break; > - case QEMU_AIO_FLUSH: > - ret = handle_aiocb_flush(aiocb); > - break; > - case QEMU_AIO_IOCTL: > - ret = handle_aiocb_ioctl(aiocb); > - break; > - default: > - fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); > - ret = -EINVAL; > - break; > - } > - > - mutex_lock(&lock); > - aiocb->ret = ret; > - idle_threads++; > - mutex_unlock(&lock); > - > - if (kill(pid, aiocb->ev_signo)) die("kill failed"); > + switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) { > + case QEMU_AIO_READ: > + case QEMU_AIO_WRITE: > + ret = handle_aiocb_rw(aiocb); > + break; > + case QEMU_AIO_FLUSH: > + ret = handle_aiocb_flush(aiocb); > + break; > + case QEMU_AIO_IOCTL: > + ret = handle_aiocb_ioctl(aiocb); > + break; > + default: > + fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); > + ret = -EINVAL; > + break; > } > > - idle_threads--; > - cur_threads--; > - mutex_unlock(&lock); > - > - return NULL; > -} > - > -static void spawn_thread(void) > -{ > - sigset_t set, oldset; > - > - cur_threads++; > - idle_threads++; > - > - /* block all signals */ > - if (sigfillset(&set)) die("sigfillset"); > - if (sigprocmask(SIG_SETMASK, &set, &oldset)) die("sigprocmask"); > + qemu_mutex_lock(&aiocb_mutex); > + aiocb->ret = ret; > + qemu_mutex_unlock(&aiocb_mutex); > > - thread_create(&thread_id, &attr, aio_thread, NULL); > - > - if (sigprocmask(SIG_SETMASK, &oldset, NULL)) die("sigprocmask restore"); > + if (kill(pid, aiocb->ev_signo)) { > + die("kill failed"); > + } > } > > static void qemu_paio_submit(struct qemu_paiocb *aiocb) > { > + qemu_mutex_lock(&aiocb_mutex); > aiocb->ret = -EINPROGRESS; > + qemu_mutex_unlock(&aiocb_mutex); > aiocb->active = 0; > - mutex_lock(&lock); > - if (idle_threads == 0 && cur_threads < max_threads) > - spawn_thread(); > - QTAILQ_INSERT_TAIL(&request_list, aiocb, node); > - mutex_unlock(&lock); > - cond_signal(&cond); > + > + aiocb->work.func = aio_thread; > + submit_work(&aiocb->work); > } > > static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb) > { > ssize_t ret; > > - mutex_lock(&lock); > + qemu_mutex_lock(&aiocb_mutex); > ret = aiocb->ret; > - mutex_unlock(&lock); > - > + qemu_mutex_unlock(&aiocb_mutex); > return ret; > } > > @@ -536,20 +619,20 @@ static void paio_cancel(BlockDriverAIOCB *blockacb) > struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb; > int active = 0; > > - mutex_lock(&lock); > if (!acb->active) { > - QTAILQ_REMOVE(&request_list, acb, node); > - acb->ret = -ECANCELED; > + if (!dequeue_work(&acb->work)) { > + acb->ret = -ECANCELED; > + } else { > + active = 1; > + } > } else if (acb->ret == -EINPROGRESS) { > active = 1; > } > - mutex_unlock(&lock); > > if (active) { > /* fail safe: if the aio could not be canceled, we wait for > it */ > - while (qemu_paio_error(acb) == EINPROGRESS) > - ; > + active = qemu_paio_error(acb); > } > > paio_remove(acb); acb->ret is not being consistently accessed with aiocb_mutex held. We don't wait for the work item to complete if it is active. This changes the semantics of paio_cancel() and will break callers who expect the request to be cancelled/completed when paio_cancel() returns. Also, we go ahead and free the acb for a running request which is dangerous because it may be reused and corrupted. I think both the active variable and field in qemu_paiocb are unnecessary because dequeue_work() already deals with inactive work items. If dequeue_work() was unsuccessful you need to wait until ret != -EINPROGRESS. Stefan