Re: [Qemu-devel] [PATCH v4 11/12] cpu-exec-common: Introduce async_safe_run_on_cpu()

Alex Bennée Tue, 26 Jul 2016 08:39:12 -0700

Sergey Fedorov <sergey.fedo...@linaro.org> writes:

> From: Sergey Fedorov <serge.f...@gmail.com>
>
> This patch is based on the ideas found in work of KONRAD Frederic [1],
> Alex Bennée [2], and Alvise Rigo [3].
>
> This mechanism allows to perform an operation safely in a quiescent
> state. Quiescent state means: (1) no vCPU is running and (2) BQL in
> system-mode or 'exclusive_lock' in user-mode emulation is held while
> performing the operation. This functionality is required e.g. for
> performing translation buffer flush safely in multi-threaded user-mode
> emulation.
<snip>
>
>  QemuCond qemu_work_cond;
> +QemuCond qemu_safe_work_cond;
> +QemuCond qemu_exclusive_cond;
> +
> +static int safe_work_pending;
> +
> +void wait_safe_cpu_work(void)
> +{
> +    while (atomic_mb_read(&safe_work_pending) > 0) {
> +        qemu_cond_wait(&qemu_safe_work_cond, qemu_get_cpu_work_mutex());
> +    }
> +}


Testing on system mode I got a dead-lock with one thread sitting here (I
assume after doing its work) and the other threads all waiting on the
exclusive condition. I think we need this:

    void wait_safe_cpu_work(void)
    {
        while (atomic_mb_read(&safe_work_pending) > 0) {
            /*
             * If there is pending safe work and no pending threads we
             * need to signal another thread to start its work.
             */
            if (tcg_pending_threads == 0) {
                qemu_cond_signal(&qemu_exclusive_cond);
            }
            qemu_cond_wait(&qemu_safe_work_cond, qemu_get_cpu_work_mutex());
        }
    }

>
>  static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
>  {
> @@ -91,9 +103,18 @@ static void queue_work_on_cpu(CPUState *cpu, struct 
> qemu_work_item *wi)
>      cpu->queued_work_last = wi;
>      wi->next = NULL;
>      wi->done = false;
> +    if (wi->safe) {
> +        atomic_inc(&safe_work_pending);
> +    }
>      qemu_mutex_unlock(&cpu->work_mutex);
>
> -    qemu_cpu_kick(cpu);
> +    if (!wi->safe) {
> +        qemu_cpu_kick(cpu);
> +    } else {
> +        CPU_FOREACH(cpu) {
> +            qemu_cpu_kick(cpu);
> +        }
> +    }
>  }
>
>  void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> @@ -108,6 +129,7 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void 
> *data)
>      wi.func = func;
>      wi.data = data;
>      wi.free = false;
> +    wi.safe = false;
>
>      queue_work_on_cpu(cpu, &wi);
>      while (!atomic_mb_read(&wi.done)) {
> @@ -131,6 +153,20 @@ void async_run_on_cpu(CPUState *cpu, run_on_cpu_func 
> func, void *data)
>      wi->func = func;
>      wi->data = data;
>      wi->free = true;
> +    wi->safe = false;
> +
> +    queue_work_on_cpu(cpu, wi);
> +}
> +
> +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
> +{
> +    struct qemu_work_item *wi;
> +
> +    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    wi->func = func;
> +    wi->data = data;
> +    wi->free = true;
> +    wi->safe = true;
>
>      queue_work_on_cpu(cpu, wi);
>  }
> @@ -150,9 +186,20 @@ void process_queued_cpu_work(CPUState *cpu)
>          if (!cpu->queued_work_first) {
>              cpu->queued_work_last = NULL;
>          }
> +        if (wi->safe) {
> +            while (tcg_pending_threads) {
> +                qemu_cond_wait(&qemu_exclusive_cond,
> +                               qemu_get_cpu_work_mutex());
> +            }
> +        }
>          qemu_mutex_unlock(&cpu->work_mutex);
>          wi->func(cpu, wi->data);
>          qemu_mutex_lock(&cpu->work_mutex);
> +        if (wi->safe) {
> +            if (!atomic_dec_fetch(&safe_work_pending)) {
> +                qemu_cond_broadcast(&qemu_safe_work_cond);
> +            }
> +        }
>          if (wi->free) {
>              g_free(wi);
>          } else {
> diff --git a/cpus.c b/cpus.c
> index 282d7e399902..b7122043f650 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -903,6 +903,8 @@ void qemu_init_cpu_loop(void)
>      qemu_cond_init(&qemu_cpu_cond);
>      qemu_cond_init(&qemu_pause_cond);
>      qemu_cond_init(&qemu_work_cond);
> +    qemu_cond_init(&qemu_safe_work_cond);
> +    qemu_cond_init(&qemu_exclusive_cond);
>      qemu_cond_init(&qemu_io_proceeded_cond);
>      qemu_mutex_init(&qemu_global_mutex);
>
> @@ -926,6 +928,20 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
>  {
>  }
>
> +/* called with qemu_global_mutex held */
> +static inline void tcg_cpu_exec_start(CPUState *cpu)
> +{
> +    tcg_pending_threads++;
> +}
> +
> +/* called with qemu_global_mutex held */
> +static inline void tcg_cpu_exec_end(CPUState *cpu)
> +{
> +    if (--tcg_pending_threads) {
> +        qemu_cond_broadcast(&qemu_exclusive_cond);

Is the broadcast a bit excessive here. Shouldn't we just wake up one
thread with a qemu_cond_signal?

> +    }
> +}
> +
>  static void qemu_wait_io_event_common(CPUState *cpu)
>  {
>      if (cpu->stop) {
> @@ -950,6 +966,8 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)
>      CPU_FOREACH(cpu) {
>          qemu_wait_io_event_common(cpu);
>      }
> +
> +    wait_safe_cpu_work();
>  }
>
>  static void qemu_kvm_wait_io_event(CPUState *cpu)
> @@ -1485,7 +1503,9 @@ static void tcg_exec_all(void)
>                            (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
>
>          if (cpu_can_run(cpu)) {
> +            tcg_cpu_exec_start(cpu);
>              r = tcg_cpu_exec(cpu);
> +            tcg_cpu_exec_end(cpu);
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>                  break;
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 8d5c7dbcf5a9..1f0272beb362 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -405,12 +405,22 @@ extern int singlestep;
>
>  /* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
>  extern CPUState *tcg_current_cpu;
> +extern int tcg_pending_threads;
>  extern bool exit_request;
>
>  /**
>   * qemu_work_cond - condition to wait for CPU work items completion
>   */
>  extern QemuCond qemu_work_cond;
> +/**
> + * qemu_safe_work_cond - condition to wait for safe CPU work items completion
> + */
> +extern QemuCond qemu_safe_work_cond;
> +/**
> + * qemu_exclusive_cond - condition to wait for all TCG threads to be out of
> + *                       guest code execution loop
> + */
> +extern QemuCond qemu_exclusive_cond;
>
>  /**
>   * qemu_get_cpu_work_mutex() - get the mutex which protects CPU work 
> execution
> @@ -423,5 +433,9 @@ QemuMutex *qemu_get_cpu_work_mutex(void);
>   * @cpu: The CPU which work queue to process.
>   */
>  void process_queued_cpu_work(CPUState *cpu);
> +/**
> + * wait_safe_cpu_work() - wait until all safe CPU work items has processed
> + */
> +void wait_safe_cpu_work(void);
>
>  #endif
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index c19a673f9f68..ab67bf2ba19f 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -238,6 +238,7 @@ struct qemu_work_item {
>      void *data;
>      int done;
>      bool free;
> +    bool safe;
>  };
>
>  /**
> @@ -632,6 +633,19 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, 
> void *data);
>  void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
>
>  /**
> + * async_safe_run_on_cpu:
> + * @cpu: The vCPU to run on.
> + * @func: The function to be executed.
> + * @data: Data to pass to the function.
> + *
> + * Schedules the function @func for execution on the vCPU @cpu asynchronously
> + * and in quiescent state. Quiescent state means: (1) all other vCPUs are
> + * halted and (2) #qemu_global_mutex (a.k.a. BQL) in system-mode or
> + * #exclusive_lock in user-mode emulation is held while @func is executing.
> + */
> +void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data);
> +
> +/**
>   * qemu_get_cpu:
>   * @index: The CPUState@cpu_index value of the CPU to obtain.
>   *
> diff --git a/linux-user/main.c b/linux-user/main.c
> index fce61d5a35fc..d0ff5f9976e5 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -110,18 +110,17 @@ int cpu_get_pic_interrupt(CPUX86State *env)
>     which requires quite a lot of per host/target work.  */
>  static QemuMutex cpu_list_mutex;
>  static QemuMutex exclusive_lock;
> -static QemuCond exclusive_cond;
>  static QemuCond exclusive_resume;
>  static bool exclusive_pending;
> -static int tcg_pending_threads;
>
>  void qemu_init_cpu_loop(void)
>  {
>      qemu_mutex_init(&cpu_list_mutex);
>      qemu_mutex_init(&exclusive_lock);
> -    qemu_cond_init(&exclusive_cond);
>      qemu_cond_init(&exclusive_resume);
>      qemu_cond_init(&qemu_work_cond);
> +    qemu_cond_init(&qemu_safe_work_cond);
> +    qemu_cond_init(&qemu_exclusive_cond);
>  }
>
>  /* Make sure everything is in a consistent state for calling fork().  */
> @@ -148,9 +147,10 @@ void fork_end(int child)
>          exclusive_pending = false;
>          qemu_mutex_init(&exclusive_lock);
>          qemu_mutex_init(&cpu_list_mutex);
> -        qemu_cond_init(&exclusive_cond);
>          qemu_cond_init(&exclusive_resume);
>          qemu_cond_init(&qemu_work_cond);
> +        qemu_cond_init(&qemu_safe_work_cond);
> +        qemu_cond_init(&qemu_exclusive_cond);
>          qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>          gdbserver_fork(thread_cpu);
>      } else {
> @@ -190,7 +190,7 @@ static inline void start_exclusive(void)
>          }
>      }
>      while (tcg_pending_threads) {
> -        qemu_cond_wait(&exclusive_cond, &exclusive_lock);
> +        qemu_cond_wait(&qemu_exclusive_cond, &exclusive_lock);
>      }
>  }
>
> @@ -219,10 +219,11 @@ static inline void cpu_exec_end(CPUState *cpu)
>      cpu->running = false;
>      tcg_pending_threads--;
>      if (!tcg_pending_threads) {
> -        qemu_cond_signal(&exclusive_cond);
> +        qemu_cond_broadcast(&qemu_exclusive_cond);
>      }
>      exclusive_idle();
>      process_queued_cpu_work(cpu);
> +    wait_safe_cpu_work();
>      qemu_mutex_unlock(&exclusive_lock);
>  }


--
Alex Bennée

Re: [Qemu-devel] [PATCH v4 11/12] cpu-exec-common: Introduce async_safe_run_on_cpu()

Reply via email to