Re: [Qemu-devel] [PATCH v2 4/5] block/nvme: add support for write zeros

2019-06-05 Thread Fam Zheng
On Wed, 04/17 22:53, Maxim Levitsky wrote:
> Signed-off-by: Maxim Levitsky 
> ---
>  block/nvme.c | 69 +++-
>  block/trace-events   |  1 +
>  include/block/nvme.h | 19 +++-
>  3 files changed, 87 insertions(+), 2 deletions(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 0b1da54574..35b925899f 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -109,6 +109,8 @@ typedef struct {
>  uint64_t max_transfer;
>  bool plugged;
>  
> +bool supports_write_zeros;
> +
>  CoMutex dma_map_lock;
>  CoQueue dma_flush_queue;
>  
> @@ -457,6 +459,10 @@ static void nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>  s->max_transfer = MIN_NON_ZERO(s->max_transfer,
>s->page_size / sizeof(uint64_t) * s->page_size);
>  
> +
> +

Too many blank lines here.

> +s->supports_write_zeros = (idctrl->oncs & NVME_ONCS_WRITE_ZEROS) != 0;
> +
>  memset(resp, 0, 4096);
>  
>  cmd.cdw10 = 0;
> @@ -469,6 +475,11 @@ static void nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>  s->nsze = le64_to_cpu(idns->nsze);
>  lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
>  
> +if (NVME_ID_NS_DLFEAT_WRITE_ZEROS(idns->dlfeat) &&
> +NVME_ID_NS_DLFEAT_READ_BEHAVIOR(idns->dlfeat) ==
> +NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROS)
> +bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP;
> +
>  if (lbaf->ms) {
>  error_setg(errp, "Namespaces with metadata are not yet supported");
>  goto out;
> @@ -763,6 +774,8 @@ static int nvme_file_open(BlockDriverState *bs, QDict 
> *options, int flags,
>  int ret;
>  BDRVNVMeState *s = bs->opaque;
>  
> +bs->supported_write_flags = BDRV_REQ_FUA;
> +
>  opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>  qemu_opts_absorb_qdict(opts, options, &error_abort);
>  device = qemu_opt_get(opts, NVME_BLOCK_OPT_DEVICE);
> @@ -791,7 +804,6 @@ static int nvme_file_open(BlockDriverState *bs, QDict 
> *options, int flags,
>  goto fail;
>  }
>  }
> -bs->supported_write_flags = BDRV_REQ_FUA;
>  return 0;
>  fail:
>  nvme_close(bs);
> @@ -1080,6 +1092,58 @@ static coroutine_fn int nvme_co_flush(BlockDriverState 
> *bs)
>  }
>  
>  
> +static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
> +int64_t offset, int bytes, BdrvRequestFlags flags)
> +{
> +BDRVNVMeState *s = bs->opaque;
> +NVMeQueuePair *ioq = s->queues[1];
> +NVMeRequest *req;
> +
> +if (!s->supports_write_zeros) {
> +return -ENOTSUP;
> +}

Local variables declaration below statements is not allowed as per coding style.

> +
> +uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0x;
> +
> +NvmeCmd cmd = {
> +.opcode = NVME_CMD_WRITE_ZEROS,
> +.nsid = cpu_to_le32(s->nsid),
> +.cdw10 = cpu_to_le32((offset >> s->blkshift) & 0x),
> +.cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0x),
> +};
> +
> +NVMeCoData data = {
> +.ctx = bdrv_get_aio_context(bs),
> +.ret = -EINPROGRESS,
> +};
> +
> +if (flags & BDRV_REQ_MAY_UNMAP) {
> +cdw12 |= (1 << 25);
> +}
> +
> +if (flags & BDRV_REQ_FUA) {
> +cdw12 |= (1 << 30);
> +}
> +
> +cmd.cdw12 = cpu_to_le32(cdw12);
> +
> +trace_nvme_write_zeros(s, offset, bytes, flags);
> +assert(s->nr_queues > 1);
> +req = nvme_get_free_req(ioq);
> +assert(req);
> +
> +nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
> +
> +data.co = qemu_coroutine_self();
> +while (data.ret == -EINPROGRESS) {
> +qemu_coroutine_yield();
> +}
> +
> +trace_nvme_rw_done(s, true, offset, bytes, data.ret);
> +return data.ret;
> +}
> +
> +
>  static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
> BlockReopenQueue *queue, Error **errp)
>  {
> @@ -1184,6 +1248,9 @@ static BlockDriver bdrv_nvme = {
>  
>  .bdrv_co_preadv   = nvme_co_preadv,
>  .bdrv_co_pwritev  = nvme_co_pwritev,
> +
> +.bdrv_co_pwrite_zeroes= nvme_co_pwrite_zeroes,
> +
>  .bdrv_co_flush_to_disk= nvme_co_flush,
>  .bdrv_reopen_prepare  = nvme_reopen_prepare,
>  
> diff --git a/block/trace-events b/block/trace-events
> index 7335a42540..943a58569f 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -144,6 +144,7 @@ nvme_submit_command_raw(int c0, int c1, int c2, int c3, 
> int c4, int c5, int c6,
>  nvme_handle_event(void *s) "s %p"
>  nvme_poll_cb(void *s) "s %p"
>  nvme_prw_aligned(void *s, int is_write, uint64_t offset, uint64_t bytes, int 
> flags, int niov) "s %p is_write %d offset %"PRId64" bytes %"PRId64" flags %d 
> niov %d"
> +nvme_write_zeros(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p 
> offset %"PRId64" bytes %"PRId64" flags %d"
>  nvme_qiov_unaligned(const vo

Re: [Qemu-devel] [PATCH v3 06/10] hw/s390x: Replace global smp variables with machine smp properties

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:24AM +0800, Like Xu wrote:
> The global smp variables in s390x are replaced with smp machine properties.
> 
> A local variable of the same name would be introduced in the declaration
> phase if it's used widely in the context OR replace it on the spot if it's
> only used once. No semantic changes.
> 
> Signed-off-by: Like Xu 

CCing s390 maintainers.

Any objections to merging this through the Machine Core tree?

> ---
>  hw/s390x/s390-virtio-ccw.c | 3 ++-
>  hw/s390x/sclp.c| 2 +-
>  target/s390x/cpu.c | 3 +++
>  target/s390x/excp_helper.c | 5 +
>  4 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index ed1fe7a93e..692ad6e372 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -83,7 +83,7 @@ static void s390_init_cpus(MachineState *machine)
>  /* initialize possible_cpus */
>  mc->possible_cpu_arch_ids(machine);
>  
> -for (i = 0; i < smp_cpus; i++) {
> +for (i = 0; i < machine->smp.cpus; i++) {
>  s390x_new_cpu(machine->cpu_type, i, &error_fatal);
>  }
>  }
> @@ -410,6 +410,7 @@ static CpuInstanceProperties 
> s390_cpu_index_to_props(MachineState *ms,
>  static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms)
>  {
>  int i;
> +unsigned int max_cpus = ms->smp.max_cpus;
>  
>  if (ms->possible_cpus) {
>  g_assert(ms->possible_cpus && ms->possible_cpus->len == max_cpus);
> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
> index 4510a800cb..fac7c3bb6c 100644
> --- a/hw/s390x/sclp.c
> +++ b/hw/s390x/sclp.c
> @@ -64,7 +64,7 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
>  prepare_cpu_entries(sclp, read_info->entries, &cpu_count);
>  read_info->entries_cpu = cpu_to_be16(cpu_count);
>  read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries));
> -read_info->highest_cpu = cpu_to_be16(max_cpus - 1);
> +read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1);
>  
>  read_info->ibc_val = cpu_to_be32(s390_get_ibc_val());
>  
> diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
> index b1df63d82c..f1e5c0d9c3 100644
> --- a/target/s390x/cpu.c
> +++ b/target/s390x/cpu.c
> @@ -37,6 +37,7 @@
>  #include "hw/qdev-properties.h"
>  #ifndef CONFIG_USER_ONLY
>  #include "hw/hw.h"
> +#include "hw/boards.h"
>  #include "sysemu/arch_init.h"
>  #include "sysemu/sysemu.h"
>  #endif
> @@ -193,6 +194,8 @@ static void s390_cpu_realizefn(DeviceState *dev, Error 
> **errp)
>  }
>  
>  #if !defined(CONFIG_USER_ONLY)
> +MachineState *ms = MACHINE(qdev_get_machine());
> +unsigned int max_cpus = ms->smp.max_cpus;
>  if (cpu->env.core_id >= max_cpus) {
>  error_setg(&err, "Unable to add CPU with core-id: %" PRIu32
> ", maximum core-id: %d", cpu->env.core_id,
> diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
> index 3a467b72c5..1c6938effc 100644
> --- a/target/s390x/excp_helper.c
> +++ b/target/s390x/excp_helper.c
> @@ -31,6 +31,7 @@
>  #ifndef CONFIG_USER_ONLY
>  #include "sysemu/sysemu.h"
>  #include "hw/s390x/s390_flic.h"
> +#include "hw/boards.h"
>  #endif
>  
>  void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t 
> code,
> @@ -300,6 +301,10 @@ static void do_ext_interrupt(CPUS390XState *env)
>  g_assert(cpu_addr < S390_MAX_CPUS);
>  lowcore->cpu_addr = cpu_to_be16(cpu_addr);
>  clear_bit(cpu_addr, env->emergency_signals);
> +#ifndef CONFIG_USER_ONLY
> +MachineState *ms = MACHINE(qdev_get_machine());
> +unsigned int max_cpus = ms->smp.max_cpus;
> +#endif
>  if (bitmap_empty(env->emergency_signals, max_cpus)) {
>  env->pending_int &= ~INTERRUPT_EMERGENCY_SIGNAL;
>  }
> -- 
> 2.21.0
> 
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH v3 07/10] hw/i386: Replace global smp variables with machine smp properties

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:25AM +0800, Like Xu wrote:
> The global smp variables in i386 are replaced with smp machine properties.
> To avoid calling qdev_get_machine() as much as possible, some related funtions
> for acpi data generations are refactored. No semantic changes.
> 
> A local variable of the same name would be introduced in the declaration
> phase if it's used widely in the context OR replace it on the spot if it's
> only used once. No semantic changes.
> 
> Signed-off-by: Like Xu 

Reviewed-by: Eduardo Habkost 

-- 
Eduardo



Re: [Qemu-devel] [PATCH v3 09/10] hw: Replace global smp variables with MachineState for all remaining archs

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:27AM +0800, Like Xu wrote:
> The global smp variables in alpha/hppa/mips/openrisc/sparc*/xtensa codes
> are replaced with smp properties from MachineState.
> 
> A local variable of the same name would be introduced in the declaration
> phase if it's used widely in the context OR replace it on the spot if it's
> only used once. No semantic changes.
> 
> Signed-off-by: Like Xu 
> Reviewed-by: Alistair Francis 

CCing the maintainers for those machines.  Any objections to merging this
through the Machine Core tree?

> ---
>  hw/alpha/dp264.c   | 1 +
>  hw/hppa/machine.c  | 2 ++
>  hw/mips/boston.c   | 2 +-
>  hw/mips/mips_malta.c   | 2 ++
>  hw/openrisc/openrisc_sim.c | 1 +
>  hw/sparc/sun4m.c   | 2 ++
>  hw/sparc64/sun4u.c | 4 ++--
>  hw/xtensa/sim.c| 2 +-
>  hw/xtensa/xtfpga.c | 1 +
>  9 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c
> index 0347eb897c..9dfb835013 100644
> --- a/hw/alpha/dp264.c
> +++ b/hw/alpha/dp264.c
> @@ -63,6 +63,7 @@ static void clipper_init(MachineState *machine)
>  char *palcode_filename;
>  uint64_t palcode_entry, palcode_low, palcode_high;
>  uint64_t kernel_entry, kernel_low, kernel_high;
> +unsigned int smp_cpus = machine->smp.cpus;
>  
>  /* Create up to 4 cpus.  */
>  memset(cpus, 0, sizeof(cpus));
> diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
> index 416e67bab1..662838d83b 100644
> --- a/hw/hppa/machine.c
> +++ b/hw/hppa/machine.c
> @@ -72,6 +72,7 @@ static void machine_hppa_init(MachineState *machine)
>  MemoryRegion *ram_region;
>  MemoryRegion *cpu_region;
>  long i;
> +unsigned int smp_cpus = machine->smp.cpus;
>  
>  ram_size = machine->ram_size;
>  
> @@ -242,6 +243,7 @@ static void machine_hppa_init(MachineState *machine)
>  
>  static void hppa_machine_reset(MachineState *ms)
>  {
> +unsigned int smp_cpus = ms->smp.cpus;
>  int i;
>  
>  qemu_devices_reset();
> diff --git a/hw/mips/boston.c b/hw/mips/boston.c
> index a8b29f62f5..ccbfac54ef 100644
> --- a/hw/mips/boston.c
> +++ b/hw/mips/boston.c
> @@ -460,7 +460,7 @@ static void boston_mach_init(MachineState *machine)
>  
>  object_property_set_str(OBJECT(s->cps), machine->cpu_type, "cpu-type",
>  &err);
> -object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
> +object_property_set_int(OBJECT(s->cps), machine->smp.cpus, "num-vp", 
> &err);
>  object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
>  
>  if (err != NULL) {
> diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
> index 5fe9512c24..ead5976d1a 100644
> --- a/hw/mips/mips_malta.c
> +++ b/hw/mips/mips_malta.c
> @@ -1095,6 +1095,8 @@ static int64_t load_kernel (void)
>  
>  static void malta_mips_config(MIPSCPU *cpu)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
> +unsigned int smp_cpus = ms->smp.cpus;
>  CPUMIPSState *env = &cpu->env;
>  CPUState *cs = CPU(cpu);
>  
> diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
> index 0a906d815e..8d828e78ee 100644
> --- a/hw/openrisc/openrisc_sim.c
> +++ b/hw/openrisc/openrisc_sim.c
> @@ -131,6 +131,7 @@ static void openrisc_sim_init(MachineState *machine)
>  qemu_irq *cpu_irqs[2];
>  qemu_irq serial_irq;
>  int n;
> +unsigned int smp_cpus = machine->smp.cpus;
>  
>  for (n = 0; n < smp_cpus; n++) {
>  cpu = OPENRISC_CPU(cpu_create(machine->cpu_type));
> diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
> index 07d126aea8..5c3739f2ef 100644
> --- a/hw/sparc/sun4m.c
> +++ b/hw/sparc/sun4m.c
> @@ -852,6 +852,8 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
>  FWCfgState *fw_cfg;
>  DeviceState *dev;
>  SysBusDevice *s;
> +unsigned int smp_cpus = machine->smp.cpus;
> +unsigned int max_cpus = machine->smp.max_cpus;
>  
>  /* init CPUs */
>  for(i = 0; i < smp_cpus; i++) {
> diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
> index 399f2d73c8..0807f274bf 100644
> --- a/hw/sparc64/sun4u.c
> +++ b/hw/sparc64/sun4u.c
> @@ -678,8 +678,8 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
>  &FW_CFG_IO(dev)->comb_iomem);
>  
>  fw_cfg = FW_CFG(dev);
> -fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
> -fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> +fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)machine->smp.cpus);
> +fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
>  fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
>  fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
>  fw_cfg_add_i64(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_entry);
> diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c
> index 12c7437398..a4eef76fbc 100644
> --- a/hw/xtensa/sim.c
> +++ b/hw/xtensa/sim.c
> @@ -60,7 +60,7 @

Re: [Qemu-devel] [PATCH v3 08/10] hw/arm: Replace global smp variables with machine smp properties

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:26AM +0800, Like Xu wrote:
> The global smp variables in arm are replaced with smp machine properties.
> The init_cpus() and *_create_rpu() are refactored to pass MachineState.
> 
> A local variable of the same name would be introduced in the declaration
> phase if it's used widely in the context OR replace it on the spot if it's
> only used once. No semantic changes.
> 
> Signed-off-by: Like Xu 
> Reviewed-by: Alistair Francis 

CCing arm maintainers.  Any objection to merging this through the
Machine Core tree?

> ---
>  hw/arm/fsl-imx6.c  |  6 +-
>  hw/arm/fsl-imx6ul.c|  6 +-
>  hw/arm/fsl-imx7.c  |  7 +--
>  hw/arm/highbank.c  |  1 +
>  hw/arm/mcimx6ul-evk.c  |  2 +-
>  hw/arm/mcimx7d-sabre.c |  2 +-
>  hw/arm/raspi.c |  4 ++--
>  hw/arm/realview.c  |  1 +
>  hw/arm/sabrelite.c |  2 +-
>  hw/arm/vexpress.c  | 16 ++--
>  hw/arm/virt.c  |  8 +++-
>  hw/arm/xlnx-zynqmp.c   | 16 ++--
>  target/arm/cpu.c   |  8 +++-
>  13 files changed, 56 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
> index 7b7b97f74c..ed772d5bd9 100644
> --- a/hw/arm/fsl-imx6.c
> +++ b/hw/arm/fsl-imx6.c
> @@ -23,6 +23,7 @@
>  #include "qapi/error.h"
>  #include "qemu-common.h"
>  #include "hw/arm/fsl-imx6.h"
> +#include "hw/boards.h"
>  #include "sysemu/sysemu.h"
>  #include "chardev/char.h"
>  #include "qemu/error-report.h"
> @@ -33,11 +34,12 @@
>  
>  static void fsl_imx6_init(Object *obj)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX6State *s = FSL_IMX6(obj);
>  char name[NAME_SIZE];
>  int i;
>  
> -for (i = 0; i < MIN(smp_cpus, FSL_IMX6_NUM_CPUS); i++) {
> +for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX6_NUM_CPUS); i++) {
>  snprintf(name, NAME_SIZE, "cpu%d", i);
>  object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]),
>  "cortex-a9-" TYPE_ARM_CPU, &error_abort, 
> NULL);
> @@ -93,9 +95,11 @@ static void fsl_imx6_init(Object *obj)
>  
>  static void fsl_imx6_realize(DeviceState *dev, Error **errp)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX6State *s = FSL_IMX6(dev);
>  uint16_t i;
>  Error *err = NULL;
> +unsigned int smp_cpus = ms->smp.cpus;
>  
>  if (smp_cpus > FSL_IMX6_NUM_CPUS) {
>  error_setg(errp, "%s: Only %d CPUs are supported (%d requested)",
> diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
> index 4b56bfa8d1..74b8ecbbb6 100644
> --- a/hw/arm/fsl-imx6ul.c
> +++ b/hw/arm/fsl-imx6ul.c
> @@ -21,6 +21,7 @@
>  #include "qemu-common.h"
>  #include "hw/arm/fsl-imx6ul.h"
>  #include "hw/misc/unimp.h"
> +#include "hw/boards.h"
>  #include "sysemu/sysemu.h"
>  #include "qemu/error-report.h"
>  
> @@ -28,11 +29,12 @@
>  
>  static void fsl_imx6ul_init(Object *obj)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX6ULState *s = FSL_IMX6UL(obj);
>  char name[NAME_SIZE];
>  int i;
>  
> -for (i = 0; i < MIN(smp_cpus, FSL_IMX6UL_NUM_CPUS); i++) {
> +for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX6UL_NUM_CPUS); i++) {
>  snprintf(name, NAME_SIZE, "cpu%d", i);
>  object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]),
>  "cortex-a7-" TYPE_ARM_CPU, &error_abort, 
> NULL);
> @@ -156,10 +158,12 @@ static void fsl_imx6ul_init(Object *obj)
>  
>  static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX6ULState *s = FSL_IMX6UL(dev);
>  int i;
>  qemu_irq irq;
>  char name[NAME_SIZE];
> +unsigned int smp_cpus = ms->smp.cpus;
>  
>  if (smp_cpus > FSL_IMX6UL_NUM_CPUS) {
>  error_setg(errp, "%s: Only %d CPUs are supported (%d requested)",
> diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
> index 7663ad6861..71cc414de6 100644
> --- a/hw/arm/fsl-imx7.c
> +++ b/hw/arm/fsl-imx7.c
> @@ -23,6 +23,7 @@
>  #include "qemu-common.h"
>  #include "hw/arm/fsl-imx7.h"
>  #include "hw/misc/unimp.h"
> +#include "hw/boards.h"
>  #include "sysemu/sysemu.h"
>  #include "qemu/error-report.h"
>  
> @@ -30,12 +31,12 @@
>  
>  static void fsl_imx7_init(Object *obj)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX7State *s = FSL_IMX7(obj);
>  char name[NAME_SIZE];
>  int i;
>  
> -
> -for (i = 0; i < MIN(smp_cpus, FSL_IMX7_NUM_CPUS); i++) {
> +for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX7_NUM_CPUS); i++) {
>  snprintf(name, NAME_SIZE, "cpu%d", i);
>  object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]),
>  ARM_CPU_TYPE_NAME("cortex-a7"), &error_abort,
> @@ -155,11 +156,13 @@ static void fsl_imx7_init(Object *obj)
>  
>  static void fsl_imx7_realize(DeviceState *dev, Error **errp)
>  {
> +MachineState *ms = MACHINE(qdev_get_machine());
>  FslIMX7State *

Re: [Qemu-devel] [PATCH v3 00/10] Refactor cpu topo into machine properties

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:18AM +0800, Like Xu wrote:
> This patch series make existing cores/threads/sockets into machine
> properties and get rid of global smp_* variables they use currently.
> 
> The purpose of getting rid of globals is disentangle layer violations and
> let's do it one step at a time by replacing the smp_foo with 
> qdev_get_machine()
> as few calls as possible and delay other related refactoring efforts.

Thanks, and sorry for taking so long to review this.  This
version looks good, I'm planning to queue it.  I will just wait
for a few days to hear from the maintainers of the machines that
are touched by the series.

> 
> ==changelog==
> 
> v3:
> 
> - rephrase commit messages
> - s/of/of present/ for CpuTopology comment
> - drop reduanct arguments such as cpu_type
> - use ms instead of macs in migration context
> - rebase to commit 1b46b4daa6
> 
> v2: https://patchwork.ozlabs.org/cover/1095727/
> 
> - pass MachineState via call chain with trivial fixups
> - replace smp_cpus directly at places if it's only used once
> - s/topo/smp/ and drop smp_ prefix inside CpuTopology structure
> - add more commit messages to explaining what patch does
> - fix Patchew build failure for xen usage
> - use macs rather than ms in migration context for MigrationState
> - cleanup unrelated and redundant changes
> - spilt OpenRISC and RISC-V related patches
> 
> v1: https://patchwork.kernel.org/cover/10876667/
> 
> Like Xu (10):
>   hw/boards: Add struct CpuTopology to MachineState
>   machine: Refactor smp-related call chains to pass MachineState
>   general: Replace global smp variables with smp machine properties
>   hw/ppc: Replace global smp variables with machine smp properties
>   hw/riscv: Replace global smp variables with machine smp properties
>   hw/s390x: Replace global smp variables with machine smp properties
>   hw/i386: Replace global smp variables with machine smp properties
>   hw/arm: Replace global smp variables with machine smp properties
>   hw: Replace global smp variables with MachineState for all remaining archs
>   vl.c: Replace smp global variables with smp machine properties
> 
>  accel/kvm/kvm-all.c  |  4 +--
>  backends/hostmem.c   |  6 +++--
>  cpus.c   |  6 +++--
>  exec.c   |  3 ++-
>  gdbstub.c|  4 +++
>  hw/alpha/dp264.c |  1 +
>  hw/arm/fsl-imx6.c|  6 -
>  hw/arm/fsl-imx6ul.c  |  6 -
>  hw/arm/fsl-imx7.c|  7 +++--
>  hw/arm/highbank.c|  1 +
>  hw/arm/mcimx6ul-evk.c|  2 +-
>  hw/arm/mcimx7d-sabre.c   |  2 +-
>  hw/arm/raspi.c   |  4 +--
>  hw/arm/realview.c|  1 +
>  hw/arm/sabrelite.c   |  2 +-
>  hw/arm/vexpress.c| 16 +++-
>  hw/arm/virt.c| 10 ++--
>  hw/arm/xlnx-zynqmp.c | 16 +++-
>  hw/cpu/core.c|  4 ++-
>  hw/hppa/machine.c|  4 ++-
>  hw/i386/acpi-build.c | 13 ++
>  hw/i386/kvmvapic.c   |  7 +++--
>  hw/i386/pc.c | 33 ++--
>  hw/i386/xen/xen-hvm.c|  4 +++
>  hw/mips/boston.c |  2 +-
>  hw/mips/mips_malta.c | 24 +
>  hw/openrisc/openrisc_sim.c   |  1 +
>  hw/ppc/e500.c|  3 +++
>  hw/ppc/mac_newworld.c|  3 ++-
>  hw/ppc/mac_oldworld.c|  3 ++-
>  hw/ppc/pnv.c |  9 ---
>  hw/ppc/prep.c|  4 +--
>  hw/ppc/spapr.c   | 37 ++
>  hw/ppc/spapr_rtas.c  |  4 ++-
>  hw/riscv/sifive_e.c  |  6 +++--
>  hw/riscv/sifive_plic.c   |  3 +++
>  hw/riscv/sifive_u.c  |  6 +++--
>  hw/riscv/spike.c |  2 ++
>  hw/riscv/virt.c  |  1 +
>  hw/s390x/s390-virtio-ccw.c   |  9 ---
>  hw/s390x/sclp.c  |  2 +-
>  hw/smbios/smbios.c   | 26 +++
>  hw/sparc/sun4m.c |  2 ++
>  hw/sparc64/sun4u.c   |  4 +--
>  hw/xtensa/sim.c  |  2 +-
>  hw/xtensa/xtfpga.c   |  1 +
>  include/hw/boards.h  | 19 --
>  include/hw/firmware/smbios.h |  5 ++--
>  include/hw/i386/pc.h |  2 +-
>  migration/postcopy-ram.c |  8 +-
>  numa.c   |  1 +
>  qmp.c|  2 +-
>  target/arm/cpu.c |  8 +-
>  target/i386/cpu.c|  4 ++-
>  target/openrisc/sys_helper.c |  6 -
>  target/s390x/cpu.c   |  3 +++
>  target/s390x/excp_helper.c   |  5 
>  tcg/tcg.c| 13 +-
>  vl.c | 50 +++-
>  59 files changed, 301 insertions(+), 141 deletions(-)
> 
> -- 
> 2.21.0
> 
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH v4 07/11] hmat acpi: Build Memory Side Cache Information Structure(s) in ACPI HMAT

2019-06-05 Thread Tao Xu

On 6/5/2019 8:12 PM, Igor Mammedov wrote:

On Wed, 5 Jun 2019 14:04:10 +0800
Tao Xu  wrote:


On 6/4/2019 11:04 PM, Igor Mammedov wrote:

On Wed,  8 May 2019 14:17:22 +0800
Tao Xu  wrote:
   

...

+
+/* SMBIOS Handles */
+/* TBD: set smbios handles */
+build_append_int_noprefix(table_data, 0, 2 * n);

Is memory side cache structure useful at all without pointing to SMBIOS entries?
   

They are not useful yet, and the kernel 5.1 HMAT sysfs doesn't show
SMBIOS entries. We can update it if it useful in the future.


In that case I'd suggest to drop it for now until this table is properly
populated and ready for consumption. (i.e. drop this patch and corresponding
CLI 9/11 patch).



But the kernel HMAT can read othe Memory Side Cache Information except 
SMBIOS entries and the host HMAT tables also haven’t SMBIOS Handles it 
also shows Number of SMBIOS handles (n) as 0. So I am wondering if it is 
better to setting "SMBIOS handles (n)" as 0, remove TODO and comment the 
reason why set it 0?




Re: [Qemu-devel] [PATCH v3 10/10] vl.c: Replace smp global variables with smp machine properties

2019-06-05 Thread Eduardo Habkost
On Sun, May 19, 2019 at 04:54:28AM +0800, Like Xu wrote:
> The global smp variables in vl.c are completely replaced with machine 
> properties.
> 
> Form this commit, the smp_cpus/smp_cores/smp_threads/max_cpus are deprecated
> and only machine properties within MachineState are fully applied and enabled.
> 
> Signed-off-by: Like Xu 
> Reviewed-by: Alistair Francis 

Reviewed-by: Eduardo Habkost 

-- 
Eduardo



Re: [Qemu-devel] [PATCH v9 5/6] ppc: spapr: Enable FWNMI capability

2019-06-05 Thread David Gibson
On Wed, May 29, 2019 at 11:10:49AM +0530, Aravinda Prasad wrote:
> Enable the KVM capability KVM_CAP_PPC_FWNMI so that
> the KVM causes guest exit with NMI as exit reason
> when it encounters a machine check exception on the
> address belonging to a guest. Without this capability
> enabled, KVM redirects machine check exceptions to
> guest's 0x200 vector.
> 
> This patch also deals with the case when a guest with
> the KVM_CAP_PPC_FWNMI capability enabled is attempted
> to migrate to a host that does not support this
> capability.
> 
> Signed-off-by: Aravinda Prasad 
> ---
>  hw/ppc/spapr.c |1 +
>  hw/ppc/spapr_caps.c|   24 
>  hw/ppc/spapr_rtas.c|   18 ++
>  include/hw/ppc/spapr.h |4 +++-
>  target/ppc/kvm.c   |   19 +++
>  target/ppc/kvm_ppc.h   |   12 
>  6 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index c97f6a6..e8a77636 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4364,6 +4364,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> void *data)
>  smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>  smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>  smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> +smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;

You need to turn this back off by default for the older machine types.

>  spapr_caps_add_properties(smc, &error_abort);
>  smc->irq = &spapr_irq_dual;
>  smc->dr_phb_enabled = true;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 31b4661..ef9e612 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -479,6 +479,20 @@ static void cap_ccf_assist_apply(SpaprMachineState 
> *spapr, uint8_t val,
>  }
>  }
>  
> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> +Error **errp)
> +{
> +if (!val) {
> +return; /* Disabled by default */
> +}
> +
> +if (tcg_enabled()) {
> +error_setg(errp, "No fwnmi support in TCG, try 
> cap-fwnmi-mce=off");
> +} else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
> +error_setg(errp, "Requested fwnmi capability not support by 
> KVM");
> +}
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>  [SPAPR_CAP_HTM] = {
>  .name = "htm",
> @@ -578,6 +592,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>  .type = "bool",
>  .apply = cap_ccf_assist_apply,
>  },
> +[SPAPR_CAP_FWNMI_MCE] = {
> +.name = "fwnmi-mce",
> +.description = "Handle fwnmi machine check exceptions",
> +.index = SPAPR_CAP_FWNMI_MCE,
> +.get = spapr_cap_get_bool,
> +.set = spapr_cap_set_bool,
> +.type = "bool",
> +.apply = cap_fwnmi_mce_apply,
> +},
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -717,6 +740,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
> SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index e0bdfc8..91a7ab9 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -49,6 +49,7 @@
>  #include "hw/ppc/fdt.h"
>  #include "target/ppc/mmu-hash64.h"
>  #include "target/ppc/mmu-book3s-v3.h"
> +#include "kvm_ppc.h"
>  
>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> uint32_t token, uint32_t nargs,
> @@ -358,6 +359,7 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>target_ulong args,
>uint32_t nret, target_ulong rets)
>  {
> +int ret;
>  hwaddr rtas_addr = spapr_get_rtas_addr();
>  
>  if (!rtas_addr) {
> @@ -365,6 +367,22 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>  return;
>  }
>  
> +if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == 0) {
> +rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +return;
> +}
> +
> +ret = kvmppc_fwnmi_enable(cpu);
> +if (ret == 1) {
> +rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +return;
> +}
> +
> +if (ret < 0) {
> +rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> +return;
> +}
> +
>  spapr->guest_machine_check_addr = rtas_ld(args, 1);
>  rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>  }
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index c717ab2..bd75d4b 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -78,8 +78,10 @@ typedef enum {
>  #define SPA

Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 5/6] ppc: spapr: Enable FWNMI capability

2019-06-05 Thread David Gibson
On Tue, Jun 04, 2019 at 12:15:26PM +0530, Aravinda Prasad wrote:
> 
> 
> On Monday 03 June 2019 08:55 PM, Greg Kurz wrote:
> > On Wed, 29 May 2019 11:10:49 +0530
> > Aravinda Prasad  wrote:
> > 
> >> Enable the KVM capability KVM_CAP_PPC_FWNMI so that
> >> the KVM causes guest exit with NMI as exit reason
> >> when it encounters a machine check exception on the
> >> address belonging to a guest. Without this capability
> >> enabled, KVM redirects machine check exceptions to
> >> guest's 0x200 vector.
> >>
> >> This patch also deals with the case when a guest with
> >> the KVM_CAP_PPC_FWNMI capability enabled is attempted
> >> to migrate to a host that does not support this
> >> capability.
> >>
> >> Signed-off-by: Aravinda Prasad 
> >> ---
> > 
> > As suggested in another mail, it may be worth introducing the sPAPR cap
> > in its own patch, earlier in the series.
> 
> Sure, also as a workaround mentioned in the reply to that mail, I am
> thinking of returning RTAS_OUT_NOT_SUPPORTED to rtas nmi register call
> until the entire functionality is implemented. This will help solve
> spapr cap issue as well.

Not registering the RTAS call at all is the correct way to handle that
case.

> 
> > 
> > Anyway, I have some comments below.
> > 
> >>  hw/ppc/spapr.c |1 +
> >>  hw/ppc/spapr_caps.c|   24 
> >>  hw/ppc/spapr_rtas.c|   18 ++
> >>  include/hw/ppc/spapr.h |4 +++-
> >>  target/ppc/kvm.c   |   19 +++
> >>  target/ppc/kvm_ppc.h   |   12 
> >>  6 files changed, 77 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index c97f6a6..e8a77636 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -4364,6 +4364,7 @@ static void spapr_machine_class_init(ObjectClass 
> >> *oc, void *data)
> >>  smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>  smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>  smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >> +smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>  spapr_caps_add_properties(smc, &error_abort);
> >>  smc->irq = &spapr_irq_dual;
> >>  smc->dr_phb_enabled = true;
> >> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >> index 31b4661..ef9e612 100644
> >> --- a/hw/ppc/spapr_caps.c
> >> +++ b/hw/ppc/spapr_caps.c
> >> @@ -479,6 +479,20 @@ static void cap_ccf_assist_apply(SpaprMachineState 
> >> *spapr, uint8_t val,
> >>  }
> >>  }
> >>  
> >> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >> +Error **errp)
> >> +{
> >> +if (!val) {
> >> +return; /* Disabled by default */
> >> +}
> >> +
> >> +if (tcg_enabled()) {
> >> +error_setg(errp, "No fwnmi support in TCG, try 
> >> cap-fwnmi-mce=off");
> > 
> > Maybe expand "fwnmi" to "Firmware Assisted Non-Maskable Interrupts" ?
> 
> sure..
> 
> > 
> >> +} else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
> >> +error_setg(errp, "Requested fwnmi capability not support by 
> >> KVM");
> > 
> > Maybe reword and add a hint:
> > 
> > "KVM implementation does not support Firmware Assisted Non-Maskable 
> > Interrupts, try cap-fwnmi-mce=off"
> 
> sure..
> 
> > 
> > 
> >> +}
> >> +}
> >> +
> >>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >>  [SPAPR_CAP_HTM] = {
> >>  .name = "htm",
> >> @@ -578,6 +592,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = 
> >> {
> >>  .type = "bool",
> >>  .apply = cap_ccf_assist_apply,
> >>  },
> >> +[SPAPR_CAP_FWNMI_MCE] = {
> >> +.name = "fwnmi-mce",
> >> +.description = "Handle fwnmi machine check exceptions",
> >> +.index = SPAPR_CAP_FWNMI_MCE,
> >> +.get = spapr_cap_get_bool,
> >> +.set = spapr_cap_set_bool,
> >> +.type = "bool",
> >> +.apply = cap_fwnmi_mce_apply,
> >> +},
> >>  };
> >>  
> >>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> >> @@ -717,6 +740,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
> >> SPAPR_CAP_HPT_MAXPAGESIZE);
> >>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> >>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> >> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
> >>  
> >>  void spapr_caps_init(SpaprMachineState *spapr)
> >>  {
> >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >> index e0bdfc8..91a7ab9 100644
> >> --- a/hw/ppc/spapr_rtas.c
> >> +++ b/hw/ppc/spapr_rtas.c
> >> @@ -49,6 +49,7 @@
> >>  #include "hw/ppc/fdt.h"
> >>  #include "target/ppc/mmu-hash64.h"
> >>  #include "target/ppc/mmu-book3s-v3.h"
> >> +#include "kvm_ppc.h"
> >>  
> >>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState 
> >> *spapr,
> >>

Re: [Qemu-devel] [PATCH v9 6/6] migration: Include migration support for machine check handling

2019-06-05 Thread David Gibson
On Wed, May 29, 2019 at 11:10:57AM +0530, Aravinda Prasad wrote:
> This patch includes migration support for machine check
> handling. Especially this patch blocks VM migration
> requests until the machine check error handling is
> complete as (i) these errors are specific to the source
> hardware and is irrelevant on the target hardware,
> (ii) these errors cause data corruption and should
> be handled before migration.
> 
> Signed-off-by: Aravinda Prasad 
> ---
>  hw/ppc/spapr.c |   20 
>  hw/ppc/spapr_events.c  |   17 +
>  hw/ppc/spapr_rtas.c|4 
>  include/hw/ppc/spapr.h |2 ++
>  4 files changed, 43 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e8a77636..31c4850 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = {
>  },
>  };
>  
> +static bool spapr_fwnmi_needed(void *opaque)
> +{
> +SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> +
> +return (spapr->guest_machine_check_addr == -1) ? 0 : 1;

Since we're introducing a PAPR capability to enable this, it would
actually be better to check that here, rather than the runtime state.
That leads to less cases and easier to understand semantics for the
migration stream.

> +}
> +
> +static const VMStateDescription vmstate_spapr_machine_check = {
> +.name = "spapr_machine_check",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.needed = spapr_fwnmi_needed,
> +.fields = (VMStateField[]) {
> +VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> +VMSTATE_INT32(mc_status, SpaprMachineState),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>  .name = "spapr",
>  .version_id = 3,
> @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
>  &vmstate_spapr_dtb,
>  &vmstate_spapr_cap_large_decr,
>  &vmstate_spapr_cap_ccf_assist,
> +&vmstate_spapr_machine_check,
>  NULL
>  }
>  };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 573c0b7..35e21e4 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -41,6 +41,7 @@
>  #include "qemu/bcd.h"
>  #include "hw/ppc/spapr_ovec.h"
>  #include 
> +#include "migration/blocker.h"
>  
>  #define RTAS_LOG_VERSION_MASK   0xff00
>  #define   RTAS_LOG_VERSION_60x0600
> @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, 
> bool recovered)
>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>  {
>  SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +int ret;
> +Error *local_err = NULL;
> +
> +error_setg(&spapr->fwnmi_migration_blocker,
> +"Live migration not supported during machine check handling");
> +ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
> +if (ret < 0) {
> +/*
> + * We don't want to abort and let the migration to continue. In a
> + * rare case, the machine check handler will run on the target
> + * hardware. Though this is not preferable, it is better than 
> aborting
> + * the migration or killing the VM.
> + */
> +error_free(spapr->fwnmi_migration_blocker);

You should set fwnmi_migration_blocker to NULL here as well.

As mentioned on an earlier iteration, the migration blocker is the
same every time.  Couldn't you just create it once and free at final
teardown, rather than recreating it for every NMI?

> +warn_report_err(local_err);
> +}
>  
>  while (spapr->mc_status != -1) {
>  /*
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 91a7ab9..c849223 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -50,6 +50,7 @@
>  #include "target/ppc/mmu-hash64.h"
>  #include "target/ppc/mmu-book3s-v3.h"
>  #include "kvm_ppc.h"
> +#include "migration/blocker.h"
>  
>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> uint32_t token, uint32_t nargs,
> @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>  spapr->mc_status = -1;
>  qemu_cond_signal(&spapr->mc_delivery_cond);
>  rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +migrate_del_blocker(spapr->fwnmi_migration_blocker);
> +error_free(spapr->fwnmi_migration_blocker);
> +spapr->fwnmi_migration_blocker = NULL;
>  }
>  }
>  
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index bd75d4b..6c0cfd8 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -214,6 +214,8 @@ struct SpaprMachineState {
>  SpaprCapabilities def, eff, mig;
>  
>  unsigned gpu_numa_id;
> +
> +Error *fwnmi_migration_blocker;
>  };
>  
>  #define H_SUCCESS 0
> 

-- 
David Gibson  

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

2019-06-05 Thread David Gibson
On Tue, Jun 04, 2019 at 11:01:19AM +0200, Greg Kurz wrote:
> On Tue, 4 Jun 2019 11:59:13 +0530
> Aravinda Prasad  wrote:
> 
> > On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> > > On Wed, 29 May 2019 11:10:40 +0530
> > > Aravinda Prasad  wrote:
> > >   
> > >> Upon a machine check exception (MCE) in a guest address space,
> > >> KVM causes a guest exit to enable QEMU to build and pass the
> > >> error to the guest in the PAPR defined rtas error log format.
> > >>
> > >> This patch builds the rtas error log, copies it to the rtas_addr
> > >> and then invokes the guest registered machine check handler. The
> > >> handler in the guest takes suitable action(s) depending on the type
> > >> and criticality of the error. For example, if an error is
> > >> unrecoverable memory corruption in an application inside the
> > >> guest, then the guest kernel sends a SIGBUS to the application.
> > >> For recoverable errors, the guest performs recovery actions and
> > >> logs the error.
> > >>
> > >> Signed-off-by: Aravinda Prasad 
> > >> ---
> > >>  hw/ppc/spapr.c |5 +
> > >>  hw/ppc/spapr_events.c  |  236 
> > >> 
> > >>  include/hw/ppc/spapr.h |4 +
> > >>  3 files changed, 245 insertions(+)
> > >>
> > >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > >> index 6b6c962..c97f6a6 100644
> > >> --- a/hw/ppc/spapr.c
> > >> +++ b/hw/ppc/spapr.c
> > >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState 
> > >> *machine)
> > >>  error_report("Could not get size of LPAR rtas '%s'", filename);
> > >>  exit(1);
> > >>  }
> > >> +
> > >> +/* Resize blob to accommodate error log. */
> > >> +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);  
> > > 
> > > I don't see the point of this assertion... especially with the assignment
> > > below.  
> > 
> > It is required because we want to ensure that the rtas image size is
> > less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
> > image with rtas error when we hit machine check exception. But I think a
> > comment in the code will help. Will add it.
> 
> I'd rather exit QEMU properly instead of aborting then. Also this is only
> needed if the guest has a chance to use FWNMI, ie. the spapr cap is
> set.

I think assert() is appropriate in this case.  If it fails it means
something is wrong in the code, not with configuration.

> 
> > 
> > >   
> > >> +spapr->rtas_size = RTAS_ERROR_LOG_MAX;  
> > > 
> > > As requested by David, this should only be done when the spapr cap is set,
> > > so that 4.0 machine types and older continue to use the current size.  
> > 
> > Due to other issue of re-allocating the blob and as this is not that
> > much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
> > 
> > Link to the discussion on this:
> > http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html
> > 
> 
> Indeed, and in the next mail in that thread, David writes:
> 
> > No, that's not right.  It's impractical to change the allocation
> > depending on whether fwnmi is currently active.  But you *can* (and
> > should) base the allocation on whether fwnmi is *possible* - that is,
> > the value of the spapr cap.
> 
> ie, allocate RTAS_ERROR_LOG_MAX when the spapr cap is set, allocate
> the file size otherwise.
> 
> > >   
> > >> +
> > >>  spapr->rtas_blob = g_malloc(spapr->rtas_size);
> > >>  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 
> > >> 0) {
> > >>  error_report("Could not load LPAR rtas '%s'", filename);
> > >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > >> index a18446b..573c0b7 100644
> > >> --- a/hw/ppc/spapr_events.c
> > >> +++ b/hw/ppc/spapr_events.c
> > >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> > >>  struct rtas_event_log_v6_hp hp;
> > >>  } QEMU_PACKED;
> > >>  
> > >> +struct rtas_event_log_v6_mc {
> > >> +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
> > >> +struct rtas_event_log_v6_section_header hdr;
> > >> +uint32_t fru_id;
> > >> +uint32_t proc_id;
> > >> +uint8_t error_type;
> > >> +#define RTAS_LOG_V6_MC_TYPE_UE   0
> > >> +#define RTAS_LOG_V6_MC_TYPE_SLB  1
> > >> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
> > >> +#define RTAS_LOG_V6_MC_TYPE_TLB  4
> > >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
> > >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
> > >> +uint8_t sub_err_type;
> > >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
> > >> +#define RTAS_LOG_V6_MC_UE_IFETCH 1
> > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
> > >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
> > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
> > >> +#define RTAS_LOG_V6_MC_SLB_PARITY0
> > >> +#defin

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

2019-06-05 Thread David Gibson
On Tue, Jun 04, 2019 at 11:59:13AM +0530, Aravinda Prasad wrote:
> 
> 
> On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> > On Wed, 29 May 2019 11:10:40 +0530
> > Aravinda Prasad  wrote:
> > 
> >> Upon a machine check exception (MCE) in a guest address space,
> >> KVM causes a guest exit to enable QEMU to build and pass the
> >> error to the guest in the PAPR defined rtas error log format.
> >>
> >> This patch builds the rtas error log, copies it to the rtas_addr
> >> and then invokes the guest registered machine check handler. The
> >> handler in the guest takes suitable action(s) depending on the type
> >> and criticality of the error. For example, if an error is
> >> unrecoverable memory corruption in an application inside the
> >> guest, then the guest kernel sends a SIGBUS to the application.
> >> For recoverable errors, the guest performs recovery actions and
> >> logs the error.
> >>
> >> Signed-off-by: Aravinda Prasad 
> >> ---
> >>  hw/ppc/spapr.c |5 +
> >>  hw/ppc/spapr_events.c  |  236 
> >> 
> >>  include/hw/ppc/spapr.h |4 +
> >>  3 files changed, 245 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 6b6c962..c97f6a6 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState 
> >> *machine)
> >>  error_report("Could not get size of LPAR rtas '%s'", filename);
> >>  exit(1);
> >>  }
> >> +
> >> +/* Resize blob to accommodate error log. */
> >> +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> > 
> > I don't see the point of this assertion... especially with the assignment
> > below.
> 
> It is required because we want to ensure that the rtas image size is
> less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
> image with rtas error when we hit machine check exception. But I think a
> comment in the code will help. Will add it.
> 
> 
> > 
> >> +spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> > 
> > As requested by David, this should only be done when the spapr cap is set,
> > so that 4.0 machine types and older continue to use the current size.
> 
> Due to other issue of re-allocating the blob and as this is not that
> much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
> 
> Link to the discussion on this:
> http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html

Sorry, I wasn't clear in that discussion.  It is definitely *not* ok
to advertise the increased size to the guest for old machine types.
It *is* ok to waste some space inside qemu internal allocations if it
reduces conditionals.


> 
> > 
> >> +
> >>  spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 
> >> 0) {
> >>  error_report("Could not load LPAR rtas '%s'", filename);
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index a18446b..573c0b7 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> >>  struct rtas_event_log_v6_hp hp;
> >>  } QEMU_PACKED;
> >>  
> >> +struct rtas_event_log_v6_mc {
> >> +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
> >> +struct rtas_event_log_v6_section_header hdr;
> >> +uint32_t fru_id;
> >> +uint32_t proc_id;
> >> +uint8_t error_type;
> >> +#define RTAS_LOG_V6_MC_TYPE_UE   0
> >> +#define RTAS_LOG_V6_MC_TYPE_SLB  1
> >> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
> >> +#define RTAS_LOG_V6_MC_TYPE_TLB  4
> >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
> >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
> >> +uint8_t sub_err_type;
> >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
> >> +#define RTAS_LOG_V6_MC_UE_IFETCH 1
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
> >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
> >> +#define RTAS_LOG_V6_MC_SLB_PARITY0
> >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
> >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
> >> +#define RTAS_LOG_V6_MC_ERAT_PARITY   1
> >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
> >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
> >> +#define RTAS_LOG_V6_MC_TLB_PARITY1
> >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
> >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
> >> +uint8_t reserved_1[6];
> >> +uint64_t effective_address;
> >> +uint64_t logical_address;
> >> +} QEMU_PACKED;
> >> +
> >> +struct mc_extended_log {
> >> +struct rtas_event_log_v6 v6hdr;
> 

Re: [Qemu-devel] [PATCH v3 04/10] hw/ppc: Replace global smp variables with machine smp properties

2019-06-05 Thread David Gibson
On Wed, Jun 05, 2019 at 11:54:56PM -0300, Eduardo Habkost wrote:
> On Wed, Jun 05, 2019 at 11:52:41PM -0300, Eduardo Habkost wrote:
> > On Sun, May 19, 2019 at 04:54:22AM +0800, Like Xu wrote:
> > > The global smp variables in ppc are replaced with smp machine properties.
> > > 
> > > A local variable of the same name would be introduced in the declaration
> > > phase if it's used widely in the context OR replace it on the spot if it's
> > > only used once. No semantic changes.
> > > 
> > > Signed-off-by: Like Xu 
> > 
> > Any objections from the ppc maintainers to queueing this through
> > the Machine Core tree?
> 
> Oops, CCing the ppc maintainers.

No objection here.

Acked-by: David Gibson 

> 
> > 
> > > ---
> > >  hw/ppc/e500.c |  3 +++
> > >  hw/ppc/mac_newworld.c |  3 ++-
> > >  hw/ppc/mac_oldworld.c |  3 ++-
> > >  hw/ppc/pnv.c  |  6 --
> > >  hw/ppc/prep.c |  4 ++--
> > >  hw/ppc/spapr.c| 34 ++
> > >  hw/ppc/spapr_rtas.c   |  4 +++-
> > >  7 files changed, 42 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
> > > index beb2efd694..5e42e5a059 100644
> > > --- a/hw/ppc/e500.c
> > > +++ b/hw/ppc/e500.c
> > > @@ -307,6 +307,7 @@ static int 
> > > ppce500_load_device_tree(PPCE500MachineState *pms,
> > >  bool dry_run)
> > >  {
> > >  MachineState *machine = MACHINE(pms);
> > > +unsigned int smp_cpus = machine->smp.cpus;
> > >  const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
> > >  CPUPPCState *env = first_cpu->env_ptr;
> > >  int ret = -1;
> > > @@ -734,6 +735,7 @@ static DeviceState 
> > > *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
> > >  SysBusDevice *s;
> > >  int i, j, k;
> > >  MachineState *machine = MACHINE(pms);
> > > +unsigned int smp_cpus = machine->smp.cpus;
> > >  const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
> > >  
> > >  dev = qdev_create(NULL, TYPE_OPENPIC);
> > > @@ -846,6 +848,7 @@ void ppce500_init(MachineState *machine)
> > >  struct boot_info *boot_info;
> > >  int dt_size;
> > >  int i;
> > > +unsigned int smp_cpus = machine->smp.cpus;
> > >  /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
> > >   * 4 respectively */
> > >  unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
> > > diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
> > > index 02d8559621..257b26ee24 100644
> > > --- a/hw/ppc/mac_newworld.c
> > > +++ b/hw/ppc/mac_newworld.c
> > > @@ -135,6 +135,7 @@ static void ppc_core99_init(MachineState *machine)
> > >  DeviceState *dev, *pic_dev;
> > >  hwaddr nvram_addr = 0xFFF04000;
> > >  uint64_t tbfreq;
> > > +unsigned int smp_cpus = machine->smp.cpus;
> > >  
> > >  linux_boot = (kernel_filename != NULL);
> > >  
> > > @@ -464,7 +465,7 @@ static void ppc_core99_init(MachineState *machine)
> > >  sysbus_mmio_map(s, 1, CFG_ADDR + 2);
> > >  
> > >  fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
> > > -fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> > > +fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, 
> > > (uint16_t)machine->smp.max_cpus);
> > >  fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
> > >  fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
> > >  fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
> > > diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
> > > index 460cbc7923..1968f05a6c 100644
> > > --- a/hw/ppc/mac_oldworld.c
> > > +++ b/hw/ppc/mac_oldworld.c
> > > @@ -99,6 +99,7 @@ static void ppc_heathrow_init(MachineState *machine)
> > >  DeviceState *dev, *pic_dev;
> > >  BusState *adb_bus;
> > >  int bios_size;
> > > +unsigned int smp_cpus = machine->smp.cpus;
> > >  uint16_t ppc_boot_device;
> > >  DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
> > >  void *fw_cfg;
> > > @@ -322,7 +323,7 @@ static void ppc_heathrow_init(MachineState *machine)
> > >  sysbus_mmio_map(s, 1, CFG_ADDR + 2);
> > >  
> > >  fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
> > > -fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> > > +fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, 
> > > (uint16_t)machine->smp.max_cpus);
> > >  fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
> > >  fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
> > >  fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
> > > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> > > index 1e8c505936..3bb15338de 100644
> > > --- a/hw/ppc/pnv.c
> > > +++ b/hw/ppc/pnv.c
> > > @@ -678,7 +678,8 @@ static void pnv_init(MachineState *machine)
> > >  object_property_add_child(OBJECT(pnv), chip_name, chip, 
> > > &error_fatal);
> > >  object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
> > >  &error_fatal);
> >

Re: [Qemu-devel] [PATCH v2 3/5] vl.c: Add -smp, dies=* command line support and update -smp doc

2019-06-05 Thread Eduardo Habkost
On Tue, May 21, 2019 at 12:50:54AM +0800, Like Xu wrote:
> For PC target, users could configure the number of dies per one package
> via command line with this patch, such as "-smp dies=2,cores=4".
> 
> A new pc-specified pc_smp_parse() is introduced and to keep the interface
> consistent, refactoring legacy smp_parse() to __smp_parse() is necessary.
> 
> The parsing rules of new cpu-topology model obey the same restrictions/logic
> as the legacy socket/core/thread model especially on missing values computing.
> 
> Signed-off-by: Like Xu 
> ---
>  qemu-options.hx | 17 +-
>  vl.c| 89 -
>  2 files changed, 97 insertions(+), 9 deletions(-)
> 
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 5daa5a8fb0..7fad5b50ff 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -138,25 +138,26 @@ no incompatible TCG features have been enabled (e.g. 
> icount/replay).
>  ETEXI
>  
>  DEF("smp", HAS_ARG, QEMU_OPTION_smp,
> -"-smp 
> [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]\n"
> +"-smp 
> [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,dies=dies][,sockets=sockets]\n"
>  "set the number of CPUs to 'n' [default=1]\n"
>  "maxcpus= maximum number of total cpus, including\n"
>  "offline CPUs for hotplug, etc\n"
> -"cores= number of CPU cores on one socket\n"
> +"cores= number of CPU cores on one socket (for PC, it's 
> on one die)\n"
>  "threads= number of threads on one CPU core\n"
> +"dies= number of CPU dies on one socket (for PC only)\n"
>  "sockets= number of discrete sockets in the system\n",
>  QEMU_ARCH_ALL)
>  STEXI
> -@item -smp 
> [cpus=]@var{n}[,cores=@var{cores}][,threads=@var{threads}][,sockets=@var{sockets}][,maxcpus=@var{maxcpus}]
> +@item -smp 
> [cpus=]@var{n}[,cores=@var{cores}][,threads=@var{threads}][,dies=dies][,sockets=@var{sockets}][,maxcpus=@var{maxcpus}]
>  @findex -smp
>  Simulate an SMP system with @var{n} CPUs. On the PC target, up to 255
>  CPUs are supported. On Sparc32 target, Linux limits the number of usable CPUs
>  to 4.
> -For the PC target, the number of @var{cores} per socket, the number
> -of @var{threads} per cores and the total number of @var{sockets} can be
> -specified. Missing values will be computed. If any on the three values is
> -given, the total number of CPUs @var{n} can be omitted. @var{maxcpus}
> -specifies the maximum number of hotpluggable CPUs.
> +For the PC target, the number of @var{cores} per die, the number of 
> @var{threads}
> +per cores, the number of @var{dies} per packages and the total number of
> +@var{sockets} can be specified. Missing values will be computed.
> +If any on the three values is given, the total number of CPUs @var{n} can be 
> omitted.
> +@var{maxcpus} specifies the maximum number of hotpluggable CPUs.
>  ETEXI
>  
>  DEF("numa", HAS_ARG, QEMU_OPTION_numa,
> diff --git a/vl.c b/vl.c
> index 8d92e2d209..66b577f447 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -63,6 +63,7 @@ int main(int argc, char **argv)
>  #include "sysemu/watchdog.h"
>  #include "hw/firmware/smbios.h"
>  #include "hw/acpi/acpi.h"
> +#include "hw/i386/pc.h"
>  #include "hw/xen/xen.h"
>  #include "hw/qdev.h"
>  #include "hw/loader.h"
> @@ -1248,6 +1249,9 @@ static QemuOptsList qemu_smp_opts = {
>  }, {
>  .name = "sockets",
>  .type = QEMU_OPT_NUMBER,
> +}, {
> +.name = "dies",
> +.type = QEMU_OPT_NUMBER,
>  }, {
>  .name = "cores",
>  .type = QEMU_OPT_NUMBER,
> @@ -1262,7 +1266,7 @@ static QemuOptsList qemu_smp_opts = {
>  },
>  };
>  
> -static void smp_parse(QemuOpts *opts)
> +static void __smp_parse(QemuOpts *opts)
>  {
>  if (opts) {
>  unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
> @@ -1334,6 +1338,89 @@ static void smp_parse(QemuOpts *opts)
>  }
>  }
>  
> +static void pc_smp_parse(QemuOpts *opts)
> +{
> +PCMachineState *pcms = (PCMachineState *)
> +object_dynamic_cast(OBJECT(current_machine), TYPE_PC_MACHINE);
> +
> +unsigned cpus= qemu_opt_get_number(opts, "cpus", 0);
> +unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
> +unsigned dies = qemu_opt_get_number(opts, "dies", 1);
> +unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
> +unsigned threads = qemu_opt_get_number(opts, "threads", 0);
> +
> +/* compute missing values, prefer sockets over cores over threads */
> +if (cpus == 0 || sockets == 0) {
> +cores = cores > 0 ? cores : 1;
> +threads = threads > 0 ? threads : 1;
> +if (cpus == 0) {
> +sockets = sockets > 0 ? sockets : 1;
> +cpus = cores * threads * dies * sockets;
> +} else {
> +current_machine->smp.max_cpus =
> +  

Re: [Qemu-devel] [PATCH v2 5/5] block/nvme: add support for discard

2019-06-05 Thread Fam Zheng
On Wed, 04/17 22:53, Maxim Levitsky wrote:
> Signed-off-by: Maxim Levitsky 
> ---
>  block/nvme.c   | 80 ++
>  block/trace-events |  2 ++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 35b925899f..b83912c627 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -110,6 +110,7 @@ typedef struct {
>  bool plugged;
>  
>  bool supports_write_zeros;
> +bool supports_discard;
>  
>  CoMutex dma_map_lock;
>  CoQueue dma_flush_queue;
> @@ -462,6 +463,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>  
>  
>  s->supports_write_zeros = (idctrl->oncs & NVME_ONCS_WRITE_ZEROS) != 0;
> +s->supports_discard = (idctrl->oncs & NVME_ONCS_DSM) != 0;
>  
>  memset(resp, 0, 4096);
>  
> @@ -1144,6 +1146,83 @@ static coroutine_fn int 
> nvme_co_pwrite_zeroes(BlockDriverState *bs,
>  }
>  
>  
> +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
> +int64_t offset, int bytes)

While you respin, you can align the parameters.

> +{
> +BDRVNVMeState *s = bs->opaque;
> +NVMeQueuePair *ioq = s->queues[1];
> +NVMeRequest *req;
> +NvmeDsmRange *buf;
> +QEMUIOVector local_qiov;
> +int r;
> +
> +NvmeCmd cmd = {
> +.opcode = NVME_CMD_DSM,
> +.nsid = cpu_to_le32(s->nsid),
> +.cdw10 = 0, /*number of ranges - 0 based*/
> +.cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/
> +};
> +
> +NVMeCoData data = {
> +.ctx = bdrv_get_aio_context(bs),
> +.ret = -EINPROGRESS,
> +};
> +
> +if (!s->supports_discard) {
> +return -ENOTSUP;
> +}
> +
> +assert(s->nr_queues > 1);
> +
> +buf = qemu_try_blockalign0(bs, 4096);
> +if (!buf) {
> +return -ENOMEM;
> +}
> +
> +buf->nlb = bytes >> s->blkshift;
> +buf->slba = offset >> s->blkshift;

This buffer is for the device, do we need to do anything about the endianness?

> +buf->cattr = 0;
> +
> +qemu_iovec_init(&local_qiov, 1);
> +qemu_iovec_add(&local_qiov, buf, 4096);
> +
> +req = nvme_get_free_req(ioq);
> +assert(req);
> +
> +qemu_co_mutex_lock(&s->dma_map_lock);
> +r = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov);
> +qemu_co_mutex_unlock(&s->dma_map_lock);
> +
> +if (r) {
> +req->busy = false;
> +return r;
> +}
> +
> +trace_nvme_dsm(s, offset, bytes);
> +
> +nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
> +
> +data.co = qemu_coroutine_self();
> +while (data.ret == -EINPROGRESS) {
> +qemu_coroutine_yield();
> +}
> +
> +qemu_co_mutex_lock(&s->dma_map_lock);
> +r = nvme_cmd_unmap_qiov(bs, &local_qiov);
> +qemu_co_mutex_unlock(&s->dma_map_lock);
> +if (r) {
> +return r;
> +}
> +
> +trace_nvme_dsm_done(s, offset, bytes, data.ret);
> +
> +qemu_iovec_destroy(&local_qiov);
> +qemu_vfree(buf);
> +return data.ret;
> +
> +}
> +
> +
>  static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
> BlockReopenQueue *queue, Error **errp)
>  {
> @@ -1250,6 +1329,7 @@ static BlockDriver bdrv_nvme = {
>  .bdrv_co_pwritev  = nvme_co_pwritev,
>  
>  .bdrv_co_pwrite_zeroes= nvme_co_pwrite_zeroes,
> +.bdrv_co_pdiscard = nvme_co_pdiscard,
>  
>  .bdrv_co_flush_to_disk= nvme_co_flush,
>  .bdrv_reopen_prepare  = nvme_reopen_prepare,
> diff --git a/block/trace-events b/block/trace-events
> index 943a58569f..e55ac5c40b 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -148,6 +148,8 @@ nvme_write_zeros(void *s, uint64_t offset, uint64_t 
> bytes, int flags) "s %p offs
>  nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int 
> align) "qiov %p n %d base %p size 0x%zx align 0x%x"
>  nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int 
> is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d"
>  nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int 
> ret) "s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d"
> +nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" 
> bytes %"PRId64""
> +nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p 
> offset %"PRId64" bytes %"PRId64" ret %d"
>  nvme_dma_map_flush(void *s) "s %p"
>  nvme_free_req_queue_wait(void *q) "q %p"
>  nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s 
> %p cmd %p req %p qiov %p entries %d"
> -- 
> 2.17.2
> 




Re: [Qemu-devel] [PATCH v2 1/5] target/i386: Add cpu die-level topology support for X86CPU

2019-06-05 Thread Eduardo Habkost
On Tue, May 21, 2019 at 12:50:52AM +0800, Like Xu wrote:
> The die-level as the first PC-specific cpu topology is added to the
> leagcy cpu topology model which only covers sockets/cores/threads.
> 
> In the new model with die-level support, the total number of logical
> processors (including offline) on board will be calculated as:
> 
>  #cpus = #sockets * #dies * #cores * #threads
> 
> and considering compatibility, the default value for #dies is 1.
> 
> A new set of die-related variables are added in smp context and the
> CPUX86State.nr_dies is assigned in x86_cpu_initfn() from PCMachineState.
> 
> Signed-off-by: Like Xu 
> ---
>  hw/i386/pc.c   | 3 +++
>  include/hw/i386/pc.h   | 2 ++
>  include/hw/i386/topology.h | 2 ++
>  qapi/misc.json | 6 --
>  target/i386/cpu.c  | 9 +
>  target/i386/cpu.h  | 3 +++
>  6 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 896c22e32e..83ab53c814 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2341,6 +2341,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
>  
>  topo.pkg_id = cpu->socket_id;
>  topo.core_id = cpu->core_id;
> +topo.die_id = cpu->die_id;
>  topo.smt_id = cpu->thread_id;
>  cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo);
>  }
> @@ -2692,6 +2693,8 @@ static const CPUArchIdList 
> *pc_possible_cpu_arch_ids(MachineState *ms)
>   ms->smp.cores, ms->smp.threads, &topo);
>  ms->possible_cpus->cpus[i].props.has_socket_id = true;
>  ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
> +ms->possible_cpus->cpus[i].props.has_die_id = true;
> +ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
>  ms->possible_cpus->cpus[i].props.has_core_id = true;
>  ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
>  ms->possible_cpus->cpus[i].props.has_thread_id = true;
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index ce3c22951e..b5faf2ede9 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -24,6 +24,7 @@
>   * PCMachineState:
>   * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
>   * @boot_cpus: number of present VCPUs
> + * @smp_dies: number of dies per one package
>   */
>  struct PCMachineState {
>  /*< private >*/
> @@ -59,6 +60,7 @@ struct PCMachineState {
>  bool apic_xrupt_override;
>  unsigned apic_id_limit;
>  uint16_t boot_cpus;
> +unsigned smp_dies;
>  
>  /* NUMA information: */
>  uint64_t numa_nodes;
> diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
> index 1ebaee0f76..7f80498eb3 100644
> --- a/include/hw/i386/topology.h
> +++ b/include/hw/i386/topology.h
> @@ -47,6 +47,7 @@ typedef uint32_t apic_id_t;
>  
>  typedef struct X86CPUTopoInfo {
>  unsigned pkg_id;
> +unsigned die_id;
>  unsigned core_id;
>  unsigned smt_id;
>  } X86CPUTopoInfo;
> @@ -130,6 +131,7 @@ static inline void x86_topo_ids_from_apicid(apic_id_t 
> apicid,
>  topo->core_id = (apicid >> apicid_core_offset(nr_cores, nr_threads)) &
> ~(0xUL << apicid_core_width(nr_cores, 
> nr_threads));
>  topo->pkg_id = apicid >> apicid_pkg_offset(nr_cores, nr_threads);
> +topo->die_id = -1;
>  }
>  
>  /* Make APIC ID for the CPU 'cpu_index'
> diff --git a/qapi/misc.json b/qapi/misc.json
> index 8b3ca4fdd3..cd236c89b3 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -2924,10 +2924,11 @@
>  #
>  # @node-id: NUMA node ID the CPU belongs to
>  # @socket-id: socket number within node/board the CPU belongs to
> -# @core-id: core number within socket the CPU belongs to
> +# @die-id: die number within node/board the CPU belongs to (Since 4.1)
> +# @core-id: core number within die the CPU belongs to
>  # @thread-id: thread number within core the CPU belongs to
>  #
> -# Note: currently there are 4 properties that could be present
> +# Note: currently there are 5 properties that could be present
>  # but management should be prepared to pass through other
>  # properties with device_add command to allow for future
>  # interface extension. This also requires the filed names to be kept in
> @@ -2938,6 +2939,7 @@
>  { 'struct': 'CpuInstanceProperties',
>'data': { '*node-id': 'int',
>  '*socket-id': 'int',
> +'*die-id': 'int',
>  '*core-id': 'int',
>  '*thread-id': 'int'
>}
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 9a93dd8be7..9bd35b4965 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -55,6 +55,7 @@
>  #include "hw/xen/xen.h"
>  #include "hw/i386/apic_internal.h"
>  #include "hw/boards.h"
> +#include "hw/i386/pc.h"

Now we have a circular dependency between target/i386/cpu.c and
hw/i386/pc.c.

>  #endif
>  
>  #include "disas/capstone.h"
> @@ -5595,7 +5596,13 @@ static 

Re: [Qemu-devel] [PATCH v2 1/5] target/i386: Add cpu die-level topology support for X86CPU

2019-06-05 Thread Eduardo Habkost
On Tue, May 21, 2019 at 12:50:52AM +0800, Like Xu wrote:
> The die-level as the first PC-specific cpu topology is added to the
> leagcy cpu topology model which only covers sockets/cores/threads.
> 
> In the new model with die-level support, the total number of logical
> processors (including offline) on board will be calculated as:
> 
>  #cpus = #sockets * #dies * #cores * #threads
> 
> and considering compatibility, the default value for #dies is 1.
> 
> A new set of die-related variables are added in smp context and the
> CPUX86State.nr_dies is assigned in x86_cpu_initfn() from PCMachineState.
> 
> Signed-off-by: Like Xu 
> ---
>  hw/i386/pc.c   | 3 +++
>  include/hw/i386/pc.h   | 2 ++
>  include/hw/i386/topology.h | 2 ++
>  qapi/misc.json | 6 --
>  target/i386/cpu.c  | 9 +
>  target/i386/cpu.h  | 3 +++
>  6 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 896c22e32e..83ab53c814 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2341,6 +2341,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
>  
>  topo.pkg_id = cpu->socket_id;
>  topo.core_id = cpu->core_id;
> +topo.die_id = cpu->die_id;
>  topo.smt_id = cpu->thread_id;
>  cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo);
>  }
> @@ -2692,6 +2693,8 @@ static const CPUArchIdList 
> *pc_possible_cpu_arch_ids(MachineState *ms)
>   ms->smp.cores, ms->smp.threads, &topo);
>  ms->possible_cpus->cpus[i].props.has_socket_id = true;
>  ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
> +ms->possible_cpus->cpus[i].props.has_die_id = true;
> +ms->possible_cpus->cpus[i].props.die_id = topo.die_id;
>  ms->possible_cpus->cpus[i].props.has_core_id = true;
>  ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
>  ms->possible_cpus->cpus[i].props.has_thread_id = true;
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index ce3c22951e..b5faf2ede9 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -24,6 +24,7 @@
>   * PCMachineState:
>   * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
>   * @boot_cpus: number of present VCPUs
> + * @smp_dies: number of dies per one package
>   */
>  struct PCMachineState {
>  /*< private >*/
> @@ -59,6 +60,7 @@ struct PCMachineState {
>  bool apic_xrupt_override;
>  unsigned apic_id_limit;
>  uint16_t boot_cpus;
> +unsigned smp_dies;
>  
>  /* NUMA information: */
>  uint64_t numa_nodes;
> diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
> index 1ebaee0f76..7f80498eb3 100644
> --- a/include/hw/i386/topology.h
> +++ b/include/hw/i386/topology.h
> @@ -47,6 +47,7 @@ typedef uint32_t apic_id_t;
>  
>  typedef struct X86CPUTopoInfo {
>  unsigned pkg_id;
> +unsigned die_id;

Isn't it better to add this field only on patch 4/5?

>  unsigned core_id;
>  unsigned smt_id;
>  } X86CPUTopoInfo;
> @@ -130,6 +131,7 @@ static inline void x86_topo_ids_from_apicid(apic_id_t 
> apicid,
>  topo->core_id = (apicid >> apicid_core_offset(nr_cores, nr_threads)) &
> ~(0xUL << apicid_core_width(nr_cores, 
> nr_threads));
>  topo->pkg_id = apicid >> apicid_pkg_offset(nr_cores, nr_threads);
> +topo->die_id = -1;

Why are you setting die_id = -1 here?

If die_id isn't valid yet, isn't it better to keep has_die_id =
false at pc_possible_cpu_arch_ids() above, and set has_die_id =
true only on patch 4/5?

>  }
>  
>  /* Make APIC ID for the CPU 'cpu_index'
> diff --git a/qapi/misc.json b/qapi/misc.json
> index 8b3ca4fdd3..cd236c89b3 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -2924,10 +2924,11 @@
>  #
>  # @node-id: NUMA node ID the CPU belongs to
>  # @socket-id: socket number within node/board the CPU belongs to
> -# @core-id: core number within socket the CPU belongs to
> +# @die-id: die number within node/board the CPU belongs to (Since 4.1)
> +# @core-id: core number within die the CPU belongs to
>  # @thread-id: thread number within core the CPU belongs to
>  #
> -# Note: currently there are 4 properties that could be present
> +# Note: currently there are 5 properties that could be present
>  # but management should be prepared to pass through other
>  # properties with device_add command to allow for future
>  # interface extension. This also requires the filed names to be kept in
> @@ -2938,6 +2939,7 @@
>  { 'struct': 'CpuInstanceProperties',
>'data': { '*node-id': 'int',
>  '*socket-id': 'int',
> +'*die-id': 'int',
>  '*core-id': 'int',
>  '*thread-id': 'int'
>}
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 9a93dd8be7..9bd35b4965 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -55,6 +55,7 @@
>  #include "hw/xen/xen.h"

[Qemu-devel] spapr_pci: Advertise BAR reallocation capability

2019-06-05 Thread Alexey Kardashevskiy
The pseries guests do not normally allocate PCI resouces and rely on
the system firmware doing so. Furthermore at least at some point in
the past the pseries guests won't even be allowed to change BARs, probably
it is still the case for phyp. So since the initial commit we have [1]
which prevents resource reallocation.

This is not a problem until we want specific BAR alignments, for example,
PAGE_SIZE==64k to make sure we can still map MMIO BARs directly. For
the boot time devices we handle this in SLOF [2] but since QEMU's RTAS
does not allocate BARs, the guest does this instead and does not align
BARs even if Linux is given pci=resource_alignment=16@pci:0:0 as
PCI_PROBE_ONLY makes Linux ignore alignment requests.

ARM folks added a dial to control PCI_PROBE_ONLY via the device tree [3].
This makes use of the dial to advertise to the guest that we can handle
BAR reassignments.

We do not remove the flag from [1] as pseries guests are still supported
under phyp so having that removed may cause problems.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/platforms/pseries/setup.c?h=v5.1#n773
[2] 
https://git.qemu.org/?p=SLOF.git;a=blob;f=board-qemu/slof/pci-phb.fs;h=06729bcf77a0d4e900c527adcd9befe2a269f65d;hb=HEAD#l338
[3] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f81c11af
Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6dd8aaac3340..84d16f9edaca 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1209,6 +1209,9 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
void *fdt)
 _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
 }
 
+/* We can deal with BAR reallocation just fine, advertise it to the guest 
*/
+_FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
+
 spapr_dt_ov5_platform_support(spapr, fdt, chosen);
 
 g_free(stdout_path);
-- 
2.17.1




Re: [Qemu-devel] spapr_pci: Advertise BAR reallocation capability

2019-06-05 Thread Alexey Kardashevskiy
I changed my handy scripts for posting patches and the subject line
broke, do I need to repost? It made it to the patchworks though.



On 06/06/2019 14:09, Alexey Kardashevskiy wrote:
> The pseries guests do not normally allocate PCI resouces and rely on
> the system firmware doing so. Furthermore at least at some point in
> the past the pseries guests won't even be allowed to change BARs, probably
> it is still the case for phyp. So since the initial commit we have [1]
> which prevents resource reallocation.
> 
> This is not a problem until we want specific BAR alignments, for example,
> PAGE_SIZE==64k to make sure we can still map MMIO BARs directly. For
> the boot time devices we handle this in SLOF [2] but since QEMU's RTAS
> does not allocate BARs, the guest does this instead and does not align
> BARs even if Linux is given pci=resource_alignment=16@pci:0:0 as
> PCI_PROBE_ONLY makes Linux ignore alignment requests.
> 
> ARM folks added a dial to control PCI_PROBE_ONLY via the device tree [3].
> This makes use of the dial to advertise to the guest that we can handle
> BAR reassignments.
> 
> We do not remove the flag from [1] as pseries guests are still supported
> under phyp so having that removed may cause problems.
> 
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/platforms/pseries/setup.c?h=v5.1#n773
> [2] 
> https://git.qemu.org/?p=SLOF.git;a=blob;f=board-qemu/slof/pci-phb.fs;h=06729bcf77a0d4e900c527adcd9befe2a269f65d;hb=HEAD#l338
> [3] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f81c11af
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  hw/ppc/spapr.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6dd8aaac3340..84d16f9edaca 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1209,6 +1209,9 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
> void *fdt)
>  _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
>  }
>  
> +/* We can deal with BAR reallocation just fine, advertise it to the 
> guest */
> +_FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
> +
>  spapr_dt_ov5_platform_support(spapr, fdt, chosen);
>  
>  g_free(stdout_path);
> 

-- 
Alexey



Re: [Qemu-devel] spapr_pci: Advertise BAR reallocation capability

2019-06-05 Thread David Gibson
On Thu, Jun 06, 2019 at 02:13:20PM +1000, Alexey Kardashevskiy wrote:
> I changed my handy scripts for posting patches and the subject line
> broke, do I need to repost? It made it to the patchworks though.

No, that's fine I've seen it and will look at it when I have the chance.

> 
> 
> 
> On 06/06/2019 14:09, Alexey Kardashevskiy wrote:
> > The pseries guests do not normally allocate PCI resouces and rely on
> > the system firmware doing so. Furthermore at least at some point in
> > the past the pseries guests won't even be allowed to change BARs, probably
> > it is still the case for phyp. So since the initial commit we have [1]
> > which prevents resource reallocation.
> > 
> > This is not a problem until we want specific BAR alignments, for example,
> > PAGE_SIZE==64k to make sure we can still map MMIO BARs directly. For
> > the boot time devices we handle this in SLOF [2] but since QEMU's RTAS
> > does not allocate BARs, the guest does this instead and does not align
> > BARs even if Linux is given pci=resource_alignment=16@pci:0:0 as
> > PCI_PROBE_ONLY makes Linux ignore alignment requests.
> > 
> > ARM folks added a dial to control PCI_PROBE_ONLY via the device tree [3].
> > This makes use of the dial to advertise to the guest that we can handle
> > BAR reassignments.
> > 
> > We do not remove the flag from [1] as pseries guests are still supported
> > under phyp so having that removed may cause problems.
> > 
> > [1] 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/powerpc/platforms/pseries/setup.c?h=v5.1#n773
> > [2] 
> > https://git.qemu.org/?p=SLOF.git;a=blob;f=board-qemu/slof/pci-phb.fs;h=06729bcf77a0d4e900c527adcd9befe2a269f65d;hb=HEAD#l338
> > [3] 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f81c11af
> > Signed-off-by: Alexey Kardashevskiy 
> > ---
> >  hw/ppc/spapr.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 6dd8aaac3340..84d16f9edaca 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1209,6 +1209,9 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, 
> > void *fdt)
> >  _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
> >  }
> >  
> > +/* We can deal with BAR reallocation just fine, advertise it to the 
> > guest */
> > +_FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
> > +
> >  spapr_dt_ov5_platform_support(spapr, fdt, chosen);
> >  
> >  g_free(stdout_path);
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-06-05 Thread Aravinda Prasad



On Thursday 06 June 2019 07:05 AM, David Gibson wrote:
> On Mon, Jun 03, 2019 at 01:17:23PM +0200, Greg Kurz wrote:
>> On Mon, 3 Jun 2019 12:12:43 +0200
>> Greg Kurz  wrote:
>>
>>> On Wed, 29 May 2019 11:10:14 +0530
>>> Aravinda Prasad  wrote:
>>>
 This patch adds support in QEMU to handle "ibm,nmi-register"
 and "ibm,nmi-interlock" RTAS calls.

 The machine check notification address is saved when the
 OS issues "ibm,nmi-register" RTAS call.

 This patch also handles the case when multiple processors
 experience machine check at or about the same time by
 handling "ibm,nmi-interlock" call. In such cases, as per
 PAPR, subsequent processors serialize waiting for the first
 processor to issue the "ibm,nmi-interlock" call. The second
 processor that also received a machine check error waits
 till the first processor is done reading the error log.
 The first processor issues "ibm,nmi-interlock" call
 when the error log is consumed. This patch implements the
 releasing part of the error-log while subsequent patch
 (which builds error log) handles the locking part.

 Signed-off-by: Aravinda Prasad 
 Reviewed-by: David Gibson 
 ---  
>>>
>>> The code looks okay but it still seems wrong to advertise the RTAS
>>> calls to the guest that early in the series. The linux kernel in
>>> the guest will assume FWNMI is functional, which isn't true until
>>> patch 6 (yes, migration is part of the feature, it should be
>>> supported upfront, not fixed afterwards).
>>>
>>> It doesn't help much to introduce the RTAS calls early and to
>>> modify them in the other patches. I'd rather see the rest of
>>> the code first and a final patch that introduces the fully
>>> functional RTAS calls and calls spapr_rtas_register().
>>>
>>
>> Thinking again, you should introduce the "fwnmi-mce" spapr capability in
>> its own patch first, default to "off" and and have the last patch in the
>> series to switch the default to "on" for newer machine types only.
>>
>> This patch should then only register the RTAS calls if "fwnmi-mcr" is set
>> to "on".
> 
> Yes, I think this is a good approach.

ok

> 
>> This should address the fact that we don't want to expose a partially
>> implemented FWNMI feature to the guest, and we don't want to support
>> FWNMI at all with older machine types for the sake of compatibility.
>>
  hw/ppc/spapr.c |7 +
  hw/ppc/spapr_rtas.c|   65 
 
  include/hw/ppc/spapr.h |9 ++-
  3 files changed, 80 insertions(+), 1 deletion(-)

 diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
 index e2b33e5..fae28a9 100644
 --- a/hw/ppc/spapr.c
 +++ b/hw/ppc/spapr.c
 @@ -1808,6 +1808,11 @@ static void spapr_machine_reset(void)
  first_ppc_cpu->env.gpr[5] = 0;
  
  spapr->cas_reboot = false;
 +
 +spapr->guest_machine_check_addr = -1;
 +
 +/* Signal all vCPUs waiting on this condition */
 +qemu_cond_broadcast(&spapr->mc_delivery_cond);
  }
  
  static void spapr_create_nvram(SpaprMachineState *spapr)
 @@ -3072,6 +3077,8 @@ static void spapr_machine_init(MachineState *machine)
  
  kvmppc_spapr_enable_inkernel_multitce();
  }
 +
 +qemu_cond_init(&spapr->mc_delivery_cond);
  }
  
  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
 diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
 index 5bc1a93..e7509cf 100644
 --- a/hw/ppc/spapr_rtas.c
 +++ b/hw/ppc/spapr_rtas.c
 @@ -352,6 +352,38 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
 SpaprMachineState *spapr,
  rtas_st(rets, 1, 100);
  }
  
 +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
 +  SpaprMachineState *spapr,
 +  uint32_t token, uint32_t nargs,
 +  target_ulong args,
 +  uint32_t nret, target_ulong rets)
 +{
 +hwaddr rtas_addr = spapr_get_rtas_addr();
 +
 +if (!rtas_addr) {
 +rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
 +return;
 +}
 +
 +spapr->guest_machine_check_addr = rtas_ld(args, 1);
 +rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 +}
 +
 +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
 +   SpaprMachineState *spapr,
 +   uint32_t token, uint32_t nargs,
 +   target_ulong args,
 +   uint32_t nret, target_ulong rets)
 +{
 +if (spapr->guest_machine_check_addr == -1) {
 +/* NMI register not called */
 +rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 +} else {
 +qemu_cond_signal(&spapr

Re: [Qemu-devel] [PATCH v9 3/6] target/ppc: Handle NMI guest exit

2019-06-05 Thread Aravinda Prasad



On Thursday 06 June 2019 07:13 AM, David Gibson wrote:
> On Wed, May 29, 2019 at 11:10:32AM +0530, Aravinda Prasad wrote:
>> Memory error such as bit flips that cannot be corrected
>> by hardware are passed on to the kernel for handling.
>> If the memory address in error belongs to guest then
>> the guest kernel is responsible for taking suitable action.
>> Patch [1] enhances KVM to exit guest with exit reason
>> set to KVM_EXIT_NMI in such cases. This patch handles
>> KVM_EXIT_NMI exit.
>>
>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>> (e20bbd3d and related commits)
>>
>> Signed-off-by: Aravinda Prasad 
>> ---
>>  hw/ppc/spapr.c  |1 +
>>  hw/ppc/spapr_events.c   |   23 +++
>>  hw/ppc/spapr_rtas.c |5 +
>>  include/hw/ppc/spapr.h  |6 ++
>>  target/ppc/kvm.c|   16 
>>  target/ppc/kvm_ppc.h|2 ++
>>  target/ppc/trace-events |1 +
>>  7 files changed, 54 insertions(+)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index fae28a9..6b6c962 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1809,6 +1809,7 @@ static void spapr_machine_reset(void)
>>  
>>  spapr->cas_reboot = false;
>>  
>> +spapr->mc_status = -1;
>>  spapr->guest_machine_check_addr = -1;
>>  
>>  /* Signal all vCPUs waiting on this condition */
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index ae0f093..a18446b 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -620,6 +620,29 @@ void 
>> spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>>  RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, 
>> &drc_id);
>>  }
>>  
>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>> +{
>> +SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> 
> You ignore the 'recovered' parameter, is that right?

I use the "recovered" parameter, but in the next patch. This was left
out when the patch was split in one of the earlier versions. Will modify it.

> 
>> +while (spapr->mc_status != -1) {
>> +/*
>> + * Check whether the same CPU got machine check error
>> + * while still handling the mc error (i.e., before
>> + * that CPU called "ibm,nmi-interlock")
>> + */
>> +if (spapr->mc_status == cpu->vcpu_id) {
>> +qemu_system_guest_panicked(NULL);
>> +return;
>> +}
>> +qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
>> +/* Meanwhile if the system is reset, then just return */
>> +if (spapr->guest_machine_check_addr == -1) {
>> +return;
>> +}
>> +}
>> +spapr->mc_status = cpu->vcpu_id;
>> +}
>> +
>>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>  uint32_t token, uint32_t nargs,
>>  target_ulong args,
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index e7509cf..e0bdfc8 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -379,6 +379,11 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>  /* NMI register not called */
>>  rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>  } else {
>> +/*
>> + * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>> + * hence unset mc_status.
>> + */
>> +spapr->mc_status = -1;
>>  qemu_cond_signal(&spapr->mc_delivery_cond);
>>  rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>  }
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 9dc5e30..fc3a776 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -190,6 +190,11 @@ struct SpaprMachineState {
>>  
>>  /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
>>  target_ulong guest_machine_check_addr;
>> +/*
>> + * mc_status is set to -1 if mc is not in progress, else is set to the 
>> CPU
>> + * handling the mc.
>> + */
>> +int mc_status;
>>  QemuCond mc_delivery_cond;
>>  
>>  /*< public >*/
>> @@ -793,6 +798,7 @@ void spapr_clear_pending_events(SpaprMachineState 
>> *spapr);
>>  int spapr_max_server_number(SpaprMachineState *spapr);
>>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>>uint64_t pte0, uint64_t pte1);
>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
>>  
>>  /* DRC callbacks. */
>>  void spapr_core_release(DeviceState *dev);
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 3bf0a46..39f1a73 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -1761,6 +1761,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
>> *run)
>>  ret = 0;
>>  break;
>>  
>> +case KVM_EXIT_NMI:
>> +trace_kvm_handle_nmi_exception();
>> +ret = kvm_handle_nmi(cpu, run);
>> +break;
>> +
>>  default:
>>  fprintf(stderr, "KVM: unkn

Re: [Qemu-devel] [PATCH v7 0/4] rng-builtin: add an RNG backend that uses qemu_guest_getrandom()

2019-06-05 Thread Markus Armbruster
Laurent Vivier  writes:

> On 05/06/2019 19:56, Markus Armbruster wrote:
>> Laurent Vivier  writes:
>> 
>>> On 05/06/2019 15:05, Markus Armbruster wrote:
 Laurent Vivier  writes:

> Add a new RNG backend using QEMU builtin getrandom function.
>
> v7: rebase on master
> Make rng-builtin asynchronous with QEMUBH (removed existing R-b)

 Pardon the ignorant question: why is that necessary?

>>>
>>> Because request_entropy() function is called while the request is not in
>>> the requests queue, so the loop on !QSIMPLEQ_EMPTY(&s->parent.requests)
>>> doens't process it. The request is added just after the call.
>> 
>> In rng_backend_request_entropy().  I see.  Any particular reason for
>> this order?  "I don't know" is an acceptable answer :)
>> 
>
> Yes...
>
> and there is a reason:
>
> in rng_random_request_entropy(), QSIMPLEQ_EMPTY() is used to know if we
> have to register an fd handler with qemu_set_fd_handler().
>
> For me, it seemed easier to use QEMUBH rather than to change the
> existing algorithm, as the backend has been thought to be asynchronous.

In your shoes, I'd be tempted to explore whether changing the order
simplifies things overall.  I'm not asking you to do that; your patch is
okay as is.

Thanks!



Re: [Qemu-devel] [PATCH v7 2/4] rng-builtin: add an RNG backend that uses qemu_guest_getrandom()

2019-06-05 Thread Markus Armbruster
Laurent Vivier  writes:

> Add a new RNG backend using QEMU builtin getrandom function.
>
> It can be created and used with something like:
>
> ... -object rng-builtin,id=rng0 -device virtio-rng,rng=rng0 ...
>
> Signed-off-by: Laurent Vivier 
> ---
>  backends/Makefile.objs |  2 +-
>  backends/rng-builtin.c | 78 ++
>  qemu-options.hx|  7 
>  3 files changed, 86 insertions(+), 1 deletion(-)
>  create mode 100644 backends/rng-builtin.c
>
> diff --git a/backends/Makefile.objs b/backends/Makefile.objs
> index 981e8e122f2c..f0691116e86e 100644
> --- a/backends/Makefile.objs
> +++ b/backends/Makefile.objs
> @@ -1,4 +1,4 @@
> -common-obj-y += rng.o rng-egd.o
> +common-obj-y += rng.o rng-egd.o rng-builtin.o
>  common-obj-$(CONFIG_POSIX) += rng-random.o
>  
>  common-obj-$(CONFIG_TPM) += tpm.o
> diff --git a/backends/rng-builtin.c b/backends/rng-builtin.c
> new file mode 100644
> index ..3381d47174df
> --- /dev/null
> +++ b/backends/rng-builtin.c
> @@ -0,0 +1,78 @@
> +/*
> + * QEMU Builtin Random Number Generator Backend
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "sysemu/rng.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/guest-random.h"
> +
> +#define TYPE_RNG_BUILTIN "rng-builtin"
> +#define RNG_BUILTIN(obj) OBJECT_CHECK(RngBuiltin, (obj), TYPE_RNG_BUILTIN)
> +
> +typedef struct RngBuiltin {
> +RngBackend parent;
> +QEMUBH *bh;
> +} RngBuiltin;
> +
> +static void rng_builtin_receive_entropy_bh(void *opaque)
> +{
> +RngBuiltin *s = opaque;
> +
> +while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
> +RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
> +
> +qemu_guest_getrandom_nofail(req->data, req->size);
> +
> +req->receive_entropy(req->opaque, req->data, req->size);
> +
> +rng_backend_finalize_request(&s->parent, req);
> +}
> +}
> +
> +static void rng_builtin_request_entropy(RngBackend *b, RngRequest *req)
> +{
> +RngBuiltin *s = RNG_BUILTIN(b);
> +
> +qemu_bh_schedule(s->bh);
> +}

A comment explaining the need for a BH would be nice.

Regardless:
Reviewed-by: Markus Armbruster 

[...]



Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 5/6] ppc: spapr: Enable FWNMI capability

2019-06-05 Thread Aravinda Prasad



On Thursday 06 June 2019 08:32 AM, David Gibson wrote:
> On Tue, Jun 04, 2019 at 12:15:26PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Monday 03 June 2019 08:55 PM, Greg Kurz wrote:
>>> On Wed, 29 May 2019 11:10:49 +0530
>>> Aravinda Prasad  wrote:
>>>
 Enable the KVM capability KVM_CAP_PPC_FWNMI so that
 the KVM causes guest exit with NMI as exit reason
 when it encounters a machine check exception on the
 address belonging to a guest. Without this capability
 enabled, KVM redirects machine check exceptions to
 guest's 0x200 vector.

 This patch also deals with the case when a guest with
 the KVM_CAP_PPC_FWNMI capability enabled is attempted
 to migrate to a host that does not support this
 capability.

 Signed-off-by: Aravinda Prasad 
 ---
>>>
>>> As suggested in another mail, it may be worth introducing the sPAPR cap
>>> in its own patch, earlier in the series.
>>
>> Sure, also as a workaround mentioned in the reply to that mail, I am
>> thinking of returning RTAS_OUT_NOT_SUPPORTED to rtas nmi register call
>> until the entire functionality is implemented. This will help solve
>> spapr cap issue as well.
> 
> Not registering the RTAS call at all is the correct way to handle that
> case.

ok.

> 
>>
>>>
>>> Anyway, I have some comments below.
>>>
  hw/ppc/spapr.c |1 +
  hw/ppc/spapr_caps.c|   24 
  hw/ppc/spapr_rtas.c|   18 ++
  include/hw/ppc/spapr.h |4 +++-
  target/ppc/kvm.c   |   19 +++
  target/ppc/kvm_ppc.h   |   12 
  6 files changed, 77 insertions(+), 1 deletion(-)

 diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
 index c97f6a6..e8a77636 100644
 --- a/hw/ppc/spapr.c
 +++ b/hw/ppc/spapr.c
 @@ -4364,6 +4364,7 @@ static void spapr_machine_class_init(ObjectClass 
 *oc, void *data)
  smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
  smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
  smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
 +smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
  spapr_caps_add_properties(smc, &error_abort);
  smc->irq = &spapr_irq_dual;
  smc->dr_phb_enabled = true;
 diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
 index 31b4661..ef9e612 100644
 --- a/hw/ppc/spapr_caps.c
 +++ b/hw/ppc/spapr_caps.c
 @@ -479,6 +479,20 @@ static void cap_ccf_assist_apply(SpaprMachineState 
 *spapr, uint8_t val,
  }
  }
  
 +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
 +Error **errp)
 +{
 +if (!val) {
 +return; /* Disabled by default */
 +}
 +
 +if (tcg_enabled()) {
 +error_setg(errp, "No fwnmi support in TCG, try 
 cap-fwnmi-mce=off");
>>>
>>> Maybe expand "fwnmi" to "Firmware Assisted Non-Maskable Interrupts" ?
>>
>> sure..
>>
>>>
 +} else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
 +error_setg(errp, "Requested fwnmi capability not support by 
 KVM");
>>>
>>> Maybe reword and add a hint:
>>>
>>> "KVM implementation does not support Firmware Assisted Non-Maskable 
>>> Interrupts, try cap-fwnmi-mce=off"
>>
>> sure..
>>
>>>
>>>
 +}
 +}
 +
  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
  [SPAPR_CAP_HTM] = {
  .name = "htm",
 @@ -578,6 +592,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = 
 {
  .type = "bool",
  .apply = cap_ccf_assist_apply,
  },
 +[SPAPR_CAP_FWNMI_MCE] = {
 +.name = "fwnmi-mce",
 +.description = "Handle fwnmi machine check exceptions",
 +.index = SPAPR_CAP_FWNMI_MCE,
 +.get = spapr_cap_get_bool,
 +.set = spapr_cap_set_bool,
 +.type = "bool",
 +.apply = cap_fwnmi_mce_apply,
 +},
  };
  
  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
 @@ -717,6 +740,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
 SPAPR_CAP_HPT_MAXPAGESIZE);
  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
  
  void spapr_caps_init(SpaprMachineState *spapr)
  {
 diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
 index e0bdfc8..91a7ab9 100644
 --- a/hw/ppc/spapr_rtas.c
 +++ b/hw/ppc/spapr_rtas.c
 @@ -49,6 +49,7 @@
  #include "hw/ppc/fdt.h"
  #include "target/ppc/mmu-hash64.h"
  #include "target/ppc/mmu-book3s-v3.h"
 +#include "kvm_ppc.h"
  
  static void rtas_display_character(PowerPCCPU *cpu, S

Re: [Qemu-devel] [PATCH v4 04/11] acpi: introduce AcpiDeviceIfClass.build_mem_ranges hook

2019-06-05 Thread Tao Xu

On 5/24/2019 8:35 PM, Igor Mammedov wrote:

On Wed,  8 May 2019 14:17:19 +0800
Tao Xu  wrote:


Add build_mem_ranges callback to AcpiDeviceIfClass and use
it for generating SRAT and HMAT numa memory ranges.

Suggested-by: Igor Mammedov 
Co-developed-by: Liu Jingqi 
Signed-off-by: Liu Jingqi 
Signed-off-by: Tao Xu 
---

...

diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c
new file mode 100644
index 00..0f104ba79d
--- /dev/null
+++ b/stubs/pc_build_mem_ranges.c
@@ -0,0 +1,6 @@
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+
+void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *machine)
+{
+}


why do you need stub?


Hi Igor,

I have questions here, I use stub here because we add hook pointer in 
piix4.c but other arch such mips use piix4. Without stub, it will failed 
when compile, like pc_madt_cpu_entry.

Or there are other way to make it use just in pc?

Thank you!




Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-06-05 Thread Aravinda Prasad



On Tuesday 04 June 2019 08:20 PM, Greg Kurz wrote:
> On Tue, 4 Jun 2019 11:38:31 +0530
> Aravinda Prasad  wrote:
> 
>> On Monday 03 June 2019 04:47 PM, Greg Kurz wrote:
>>> On Mon, 3 Jun 2019 12:12:43 +0200
>>> Greg Kurz  wrote:
>>>   
 On Wed, 29 May 2019 11:10:14 +0530
 Aravinda Prasad  wrote:
  
> This patch adds support in QEMU to handle "ibm,nmi-register"
> and "ibm,nmi-interlock" RTAS calls.
>
> The machine check notification address is saved when the
> OS issues "ibm,nmi-register" RTAS call.
>
> This patch also handles the case when multiple processors
> experience machine check at or about the same time by
> handling "ibm,nmi-interlock" call. In such cases, as per
> PAPR, subsequent processors serialize waiting for the first
> processor to issue the "ibm,nmi-interlock" call. The second
> processor that also received a machine check error waits
> till the first processor is done reading the error log.
> The first processor issues "ibm,nmi-interlock" call
> when the error log is consumed. This patch implements the
> releasing part of the error-log while subsequent patch
> (which builds error log) handles the locking part.
>
> Signed-off-by: Aravinda Prasad 
> Reviewed-by: David Gibson 
> ---

 The code looks okay but it still seems wrong to advertise the RTAS
 calls to the guest that early in the series. The linux kernel in
 the guest will assume FWNMI is functional, which isn't true until
 patch 6 (yes, migration is part of the feature, it should be
 supported upfront, not fixed afterwards).

 It doesn't help much to introduce the RTAS calls early and to
 modify them in the other patches. I'd rather see the rest of
 the code first and a final patch that introduces the fully
 functional RTAS calls and calls spapr_rtas_register().
  
>>>
>>> Thinking again, you should introduce the "fwnmi-mce" spapr capability in
>>> its own patch first, default to "off" and and have the last patch in the
>>> series to switch the default to "on" for newer machine types only.
>>>
>>> This patch should then only register the RTAS calls if "fwnmi-mcr" is set
>>> to "on".
>>>
>>> This should address the fact that we don't want to expose a partially
>>> implemented FWNMI feature to the guest, and we don't want to support
>>> FWNMI at all with older machine types for the sake of compatibility.  
>>
>> When you say "expose a partially implemented FWNMI feature to the
>> guest", do you mean while debugging/bisect we may end up with exposing
>> the partially implemented FWNMI feature? Otherwise it is expected that
> 
> Yes, we don't want to break someone else's bisect.

ok.

> 
>> QEMU runs with all the 6 patches.
>>
>> If that is the case, I will have the rtas nmi register functionality as
>> the last patch in the series. This way we don't have to have spapr cap
>> turned off first and later turned on. However, as mentioned earlier
>> (when David raised the same concern), use of guest_machine_check_addr
>> may look odd at other patches as it is set only during rtas nmi register.
>>
> 
> Why odd ?

see below

> 
>> Or else, as a workaround, I can return RTAS_OUT_NOT_SUPPORTED for rtas
>> nmi register till the entire functionality is implemented and only in
>> the last patch in the series I will return RTAS_OUT_SUCCESS. This will
>> ensure that we have a logical connection between the patches and the
>> partially implemented fwnmi is not exposed to the guest kernel.
>>
> 
> Not exactly true. FWNMI would be exposed to the guest in the device tree
> and the guest kernel would _just_ fail to set the fwnmi_active global:
> 
>   if (0 == rtas_call(ibm_nmi_register, 2, 1, NULL, system_reset_addr,
>   machine_check_addr))
>   fwnmi_active = 1;

Sorry for the confusion. I thought the suggestion was to introduce
rtas_ibm_nmi_register() call later in the series, but now I see that I
can still have rtas_ibm_nmi_register(), but have a final patch that
calls spapr_rtas_register().


> 
>> Regards,
>> Aravinda
>>
>>
>>
>>
>>>   
>  hw/ppc/spapr.c |7 +
>  hw/ppc/spapr_rtas.c|   65 
> 
>  include/hw/ppc/spapr.h |9 ++-
>  3 files changed, 80 insertions(+), 1 deletion(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e2b33e5..fae28a9 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1808,6 +1808,11 @@ static void spapr_machine_reset(void)
>  first_ppc_cpu->env.gpr[5] = 0;
>  
>  spapr->cas_reboot = false;
> +
> +spapr->guest_machine_check_addr = -1;
> +
> +/* Signal all vCPUs waiting on this condition */
> +qemu_cond_broadcast(&spapr->mc_delivery_cond);
>  }
>  
>  static void spapr_create_nvram(SpaprMachineState *spapr)
> @@ -3072,6 +3077,8 @@ static

Re: [Qemu-devel] Deprecation policy and build dependencies

2019-06-05 Thread Markus Armbruster
Eric Blake  writes:

> On 6/5/19 3:13 PM, Eduardo Habkost wrote:
>
>>> IOW, I don't think RHEL-7 support as a build platform blocks us from
>>> dropping py2. We merely need to tweak our build platforms doc to clarify
>>> our intent wrt add-on yum repos.
>> 
>> If we clarify the docs in QEMU 4.1, is there anything that
>> prevents us from removing Python 2 support in QEMU 4.1 too?
>
> My take (but not definitive): if we have any CI setups that are testing
> RHEL 7 without software collections and/or EPEL, then save Python 2
> removal for 4.2 to give us time to update CI setups. But if all of our
> CI setups are already fine, and we clarify the docs, then I'm all for
> getting rid of Python 2 support in 4.1.

There's still time to update CI setups without undue haste.  But I agree
we don't want to lose CI even temporarily just to expedite getting rid
of Python 2.

> Similarly, if we are going to outlaw in-tree builds, let's get that done
> in 4.1 instead of waiting yet another release.

For that we need patches.



Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-06-05 Thread Aravinda Prasad



On Thursday 06 June 2019 07:04 AM, David Gibson wrote:
> On Wed, May 29, 2019 at 11:10:14AM +0530, Aravinda Prasad wrote:
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls.
>>
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed. This patch implements the
>> releasing part of the error-log while subsequent patch
>> (which builds error log) handles the locking part.
>>
>> Signed-off-by: Aravinda Prasad 
>> Reviewed-by: David Gibson 
>> ---
>>  hw/ppc/spapr.c |7 +
>>  hw/ppc/spapr_rtas.c|   65 
>> 
>>  include/hw/ppc/spapr.h |9 ++-
>>  3 files changed, 80 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index e2b33e5..fae28a9 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1808,6 +1808,11 @@ static void spapr_machine_reset(void)
>>  first_ppc_cpu->env.gpr[5] = 0;
>>  
>>  spapr->cas_reboot = false;
>> +
>> +spapr->guest_machine_check_addr = -1;
>> +
>> +/* Signal all vCPUs waiting on this condition */
>> +qemu_cond_broadcast(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static void spapr_create_nvram(SpaprMachineState *spapr)
>> @@ -3072,6 +3077,8 @@ static void spapr_machine_init(MachineState *machine)
>>  
>>  kvmppc_spapr_enable_inkernel_multitce();
>>  }
>> +
>> +qemu_cond_init(&spapr->mc_delivery_cond);
>>  }
>>  
>>  static int spapr_kvm_type(MachineState *machine, const char *vm_type)
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index 5bc1a93..e7509cf 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -352,6 +352,38 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
>> SpaprMachineState *spapr,
>>  rtas_st(rets, 1, 100);
>>  }
>>  
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> +  SpaprMachineState *spapr,
>> +  uint32_t token, uint32_t nargs,
>> +  target_ulong args,
>> +  uint32_t nret, target_ulong rets)
>> +{
>> +hwaddr rtas_addr = spapr_get_rtas_addr();
>> +
>> +if (!rtas_addr) {
>> +rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +return;
>> +}
>> +
>> +spapr->guest_machine_check_addr = rtas_ld(args, 1);
>> +rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> +   SpaprMachineState *spapr,
>> +   uint32_t token, uint32_t nargs,
>> +   target_ulong args,
>> +   uint32_t nret, target_ulong rets)
>> +{
>> +if (spapr->guest_machine_check_addr == -1) {
>> +/* NMI register not called */
>> +rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +} else {
>> +qemu_cond_signal(&spapr->mc_delivery_cond);
>> +rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +}
>> +
>>  static struct rtas_call {
>>  const char *name;
>>  spapr_rtas_fn fn;
>> @@ -470,6 +502,35 @@ void spapr_load_rtas(SpaprMachineState *spapr, void 
>> *fdt, hwaddr addr)
>>  }
>>  }
>>  
>> +hwaddr spapr_get_rtas_addr(void)
>> +{
>> +SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +int rtas_node;
>> +const struct fdt_property *rtas_addr_prop;
>> +void *fdt = spapr->fdt_blob;
>> +uint32_t rtas_addr;
>> +
>> +/* fetch rtas addr from fdt */
>> +rtas_node = fdt_path_offset(fdt, "/rtas");
>> +if (rtas_node == 0) {
>> +return 0;
> 
> This is incorrect, a return code < 0 indicates an error which you
> should check for.  A return code of 0 indicates the root node, which
> could only happen if libfdt was badly buggy.

ok

> 
>> +}
>> +
>> +rtas_addr_prop = fdt_get_property(fdt, rtas_node, "linux,rtas-base", 
>> NULL);
> 
> fdt_get_property is generally only needed for certain edge cases.
> fdt_getprop() is a better option.

ok

> 
>> +if (!rtas_addr_prop) {
>> +return 0;
>> +}
>> +
>> +/*
>> + * We assume that the OS called RTAS instantiate-rtas, but some other
>> + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
>> + * as SLOF only supports 32-bit variant.
>> + */
>> +rtas_addr = fdt32_to_cpu(*(uint32_t *)rtas_addr_prop->data);
>> +

Re: [Qemu-devel] [Qemu-devel PATCH v2 2/2] util/main-loop: Fix incorrect assertion

2019-06-05 Thread Markus Armbruster
You neglected to cc: the file's maintainer.  I'm doing that for you now.
In the future, use scripts/get_maintainer.pl to find maintainers you
might want to cc:.

Lidong Chen  writes:

> The check for poll_fds in g_assert() was incorrect. The correct assertion
> should check "n_poll_fds + w->num <= ARRAY_SIZE(poll_fds)" because the
> subsequent for-loop is doing access to poll_fds[n_poll_fds + i] where i
> is in [0, w->num).
>
> Signed-off-by: Lidong Chen 
> Suggested-by: Peter Maydell 
> Suggested-by: Liam Merwick 
> Reviewed-by: Liran Alon 
> Reviewed-by: Darren Kenny 
> Reviewed-by: Li Qiang 
> ---
>  util/main-loop.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/util/main-loop.c b/util/main-loop.c
> index e1e349c..a9f4e8d 100644
> --- a/util/main-loop.c
> +++ b/util/main-loop.c
> @@ -422,7 +422,7 @@ static int os_host_main_loop_wait(int64_t timeout)
>  g_main_context_prepare(context, &max_priority);
>  n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
>poll_fds, ARRAY_SIZE(poll_fds));
> -g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));
> +g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
>  
>  for (i = 0; i < w->num; i++) {
>  poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];



Re: [Qemu-devel] Qemu baseline requirements/portability?

2019-06-05 Thread Markus Armbruster
"H. Peter Anvin"  writes:

> On 6/5/19 12:55 PM, H. Peter Anvin wrote:
>> Hi,
>> 
>> I am writing some code I'm hoping will be able to make it into Qemu, but I
>> can't seem to find what the baseline portability requirements are.  I'm
>> specifically wondering about newer POSIX features like openat(), which seems
>> to be used in the 9p filesystem and nowhere else, and what version of glib 
>> one
>> can rely on?
>> 
>
> Specifically, I'm trying to satisfy a 10-year-old request by me and others to
> support composite initrd during Linux boot.

Please peruse qemu-doc.texi @appendix Supported build platforms.  If it
doesn't fully answer your question, ask for clarification here.



Re: [Qemu-devel] [PATCH v6 4/6] include/elf: Add defines related to notes for GNU systems

2019-06-05 Thread Aleksandar Markovic
On Jun 5, 2019 11:03 PM, "Richard Henderson" 
wrote:
>
> This is a collection of related

Related to what?

> defines for notes, copied
> from glibc's .  We're not going to use all of these
> right away, but it seemed foolish

I don't think this an appropriate word for a commit message.

> to cherry-pick only the
> ones we need now.
>
> Signed-off-by: Richard Henderson 
> ---
>  include/elf.h | 48 
>  1 file changed, 48 insertions(+)
>
> diff --git a/include/elf.h b/include/elf.h
> index ea7708a4ea..6f3eada36f 100644
> --- a/include/elf.h
> +++ b/include/elf.h
> @@ -1650,6 +1650,54 @@ typedef struct elf64_shdr {
>  #define NT_ARM_HW_WATCH 0x403   /* ARM hardware watchpoint
registers */
>  #define NT_ARM_SYSTEM_CALL  0x404   /* ARM system call number */
>
> +/* Defined note types for GNU systems.  */
> +
> +#define NT_GNU_ABI_TAG  1   /* ABI information */
> +#define NT_GNU_HWCAP2   /* Synthetic hwcap information */
> +#define NT_GNU_BUILD_ID 3   /* Build ID */
> +#define NT_GNU_GOLD_VERSION 4   /* Version of ld.gold */
> +#define NT_GNU_PROPERTY_TYPE_0  5   /* Program property */
> +
> +/* Values used in GNU .note.gnu.property notes
(NT_GNU_PROPERTY_TYPE_0).  */
> +
> +#define GNU_PROPERTY_STACK_SIZE 1
> +#define GNU_PROPERTY_NO_COPY_ON_PROTECTED   2
> +
> +#define GNU_PROPERTY_LOPROC 0xc000
> +#define GNU_PROPERTY_HIPROC 0xdfff
> +#define GNU_PROPERTY_LOUSER 0xe000
> +#define GNU_PROPERTY_HIUSER 0x
> +
> +#define GNU_PROPERTY_X86_ISA_1_USED 0xc000
> +#define GNU_PROPERTY_X86_ISA_1_NEEDED   0xc001
> +
> +#define GNU_PROPERTY_X86_ISA_1_486  (1U << 0)
> +#define GNU_PROPERTY_X86_ISA_1_586  (1U << 1)
> +#define GNU_PROPERTY_X86_ISA_1_686  (1U << 2)
> +#define GNU_PROPERTY_X86_ISA_1_SSE  (1U << 3)
> +#define GNU_PROPERTY_X86_ISA_1_SSE2 (1U << 4)
> +#define GNU_PROPERTY_X86_ISA_1_SSE3 (1U << 5)
> +#define GNU_PROPERTY_X86_ISA_1_SSSE3(1U << 6)
> +#define GNU_PROPERTY_X86_ISA_1_SSE4_1   (1U << 7)
> +#define GNU_PROPERTY_X86_ISA_1_SSE4_2   (1U << 8)
> +#define GNU_PROPERTY_X86_ISA_1_AVX  (1U << 9)
> +#define GNU_PROPERTY_X86_ISA_1_AVX2 (1U << 10)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512F  (1U << 11)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512CD (1U << 12)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512ER (1U << 13)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512PF (1U << 14)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512VL (1U << 15)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512DQ (1U << 16)
> +#define GNU_PROPERTY_X86_ISA_1_AVX512BW (1U << 17)
> +
> +#define GNU_PROPERTY_X86_FEATURE_1_AND  0xc002
> +#define GNU_PROPERTY_X86_FEATURE_1_IBT  (1U << 0)
> +#define GNU_PROPERTY_X86_FEATURE_1_SHSTK(1U << 1)
> +
> +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND  0xc000
> +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI  (1u << 0)
> +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC  (1u << 1)
> +
>  /*
>   * Physical entry point into the kernel.
>   *
> --
> 2.17.1
>
>


Re: [Qemu-devel] [PATCH v9 6/6] migration: Include migration support for machine check handling

2019-06-05 Thread Greg Kurz
On Thu, 6 Jun 2019 13:06:14 +1000
David Gibson  wrote:

> On Wed, May 29, 2019 at 11:10:57AM +0530, Aravinda Prasad wrote:
> > This patch includes migration support for machine check
> > handling. Especially this patch blocks VM migration
> > requests until the machine check error handling is
> > complete as (i) these errors are specific to the source
> > hardware and is irrelevant on the target hardware,
> > (ii) these errors cause data corruption and should
> > be handled before migration.
> > 
> > Signed-off-by: Aravinda Prasad 
> > ---
> >  hw/ppc/spapr.c |   20 
> >  hw/ppc/spapr_events.c  |   17 +
> >  hw/ppc/spapr_rtas.c|4 
> >  include/hw/ppc/spapr.h |2 ++
> >  4 files changed, 43 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index e8a77636..31c4850 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = {
> >  },
> >  };
> >  
> > +static bool spapr_fwnmi_needed(void *opaque)
> > +{
> > +SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> > +
> > +return (spapr->guest_machine_check_addr == -1) ? 0 : 1;  
> 
> Since we're introducing a PAPR capability to enable this, it would
> actually be better to check that here, rather than the runtime state.
> That leads to less cases and easier to understand semantics for the
> migration stream.
> 

Hmmm... the purpose of needed() VMState callbacks is precisely about
runtime state: the subsection should only be migrated if an MCE is
pending, ie. spapr->guest_machine_check_addr != -1.

> > +}
> > +
> > +static const VMStateDescription vmstate_spapr_machine_check = {
> > +.name = "spapr_machine_check",
> > +.version_id = 1,
> > +.minimum_version_id = 1,
> > +.needed = spapr_fwnmi_needed,
> > +.fields = (VMStateField[]) {
> > +VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> > +VMSTATE_INT32(mc_status, SpaprMachineState),
> > +VMSTATE_END_OF_LIST()
> > +},
> > +};
> > +
> >  static const VMStateDescription vmstate_spapr = {
> >  .name = "spapr",
> >  .version_id = 3,
> > @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
> >  &vmstate_spapr_dtb,
> >  &vmstate_spapr_cap_large_decr,
> >  &vmstate_spapr_cap_ccf_assist,
> > +&vmstate_spapr_machine_check,
> >  NULL
> >  }
> >  };
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 573c0b7..35e21e4 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -41,6 +41,7 @@
> >  #include "qemu/bcd.h"
> >  #include "hw/ppc/spapr_ovec.h"
> >  #include 
> > +#include "migration/blocker.h"
> >  
> >  #define RTAS_LOG_VERSION_MASK   0xff00
> >  #define   RTAS_LOG_VERSION_60x0600
> > @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, 
> > bool recovered)
> >  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >  {
> >  SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > +int ret;
> > +Error *local_err = NULL;
> > +
> > +error_setg(&spapr->fwnmi_migration_blocker,
> > +"Live migration not supported during machine check handling");
> > +ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
> > +if (ret < 0) {
> > +/*
> > + * We don't want to abort and let the migration to continue. In a
> > + * rare case, the machine check handler will run on the target
> > + * hardware. Though this is not preferable, it is better than 
> > aborting
> > + * the migration or killing the VM.
> > + */
> > +error_free(spapr->fwnmi_migration_blocker);  
> 
> You should set fwnmi_migration_blocker to NULL here as well.
> 
> As mentioned on an earlier iteration, the migration blocker is the
> same every time.  Couldn't you just create it once and free at final
> teardown, rather than recreating it for every NMI?
> 
> > +warn_report_err(local_err);
> > +}
> >  
> >  while (spapr->mc_status != -1) {
> >  /*
> > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > index 91a7ab9..c849223 100644
> > --- a/hw/ppc/spapr_rtas.c
> > +++ b/hw/ppc/spapr_rtas.c
> > @@ -50,6 +50,7 @@
> >  #include "target/ppc/mmu-hash64.h"
> >  #include "target/ppc/mmu-book3s-v3.h"
> >  #include "kvm_ppc.h"
> > +#include "migration/blocker.h"
> >  
> >  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState 
> > *spapr,
> > uint32_t token, uint32_t nargs,
> > @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >  spapr->mc_status = -1;
> >  qemu_cond_signal(&spapr->mc_delivery_cond);
> >  rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > +migrate_del_blocker(spapr->fwnmi_migration_blocker);
> > +error_fr

Re: [Qemu-devel] [PATCH] hw: misc: Add Aspeed XDMA device

2019-06-05 Thread Cédric Le Goater
Hello Eddie,

On 04/06/2019 00:09, Eddie James wrote:
> The XDMA engine embedded in the Aspeed SOCs performs PCI DMA operations
> between the SOC (acting as a BMC) and a host processor in a server.
> 
> The XDMA engine exists on the AST2400, AST2500, and AST2600 SOCs, so
> enable it for all of those.
> 
> Signed-off-by: Eddie James 

This looks correct to me. It's sufficient to exercise the BMC driver.

However, we will need to rebase on an Aspeed patchset I sent earlier :

   http://patchwork.ozlabs.org/cover/1105343/

I can do that and include the patch in my tree for the moment. 


For my understanding, how can we interact with the model and pretend
there is a host side ? 

Thanks,

C. 

> ---
>  hw/arm/aspeed_soc.c   |  14 
>  hw/misc/Makefile.objs |   2 +-
>  hw/misc/aspeed_xdma.c | 156 
> ++
>  include/hw/arm/aspeed_soc.h   |   2 +
>  include/hw/misc/aspeed_xdma.h |  31 +
>  5 files changed, 204 insertions(+), 1 deletion(-)
>  create mode 100644 hw/misc/aspeed_xdma.c
>  create mode 100644 include/hw/misc/aspeed_xdma.h
> 
> diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
> index faff42b..b25bb18 100644
> --- a/hw/arm/aspeed_soc.c
> +++ b/hw/arm/aspeed_soc.c
> @@ -31,6 +31,7 @@
>  #define ASPEED_SOC_VIC_BASE 0x1E6C
>  #define ASPEED_SOC_SDMC_BASE0x1E6E
>  #define ASPEED_SOC_SCU_BASE 0x1E6E2000
> +#define ASPEED_SOC_XDMA_BASE0x1E6E7000
>  #define ASPEED_SOC_SRAM_BASE0x1E72
>  #define ASPEED_SOC_TIMER_BASE   0x1E782000
>  #define ASPEED_SOC_WDT_BASE 0x1E785000
> @@ -159,6 +160,9 @@ static void aspeed_soc_init(Object *obj)
>  
>  sysbus_init_child_obj(obj, "ftgmac100", OBJECT(&s->ftgmac100),
>sizeof(s->ftgmac100), TYPE_FTGMAC100);
> +
> +sysbus_init_child_obj(obj, "xdma", OBJECT(&s->xdma), sizeof(s->xdma),
> +  TYPE_ASPEED_XDMA);
>  }
>  
>  static void aspeed_soc_realize(DeviceState *dev, Error **errp)
> @@ -298,6 +302,16 @@ static void aspeed_soc_realize(DeviceState *dev, Error 
> **errp)
>  sysbus_mmio_map(SYS_BUS_DEVICE(&s->ftgmac100), 0, ASPEED_SOC_ETH1_BASE);
>  sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100), 0,
> qdev_get_gpio_in(DEVICE(&s->vic), 2));
> +
> +/* XDMA */
> +object_property_set_bool(OBJECT(&s->xdma), true, "realized", &err);
> +if (err) {
> +error_propagate(errp, err);
> +return;
> +}
> +sysbus_mmio_map(SYS_BUS_DEVICE(&s->xdma), 0, ASPEED_SOC_XDMA_BASE);
> +sysbus_connect_irq(SYS_BUS_DEVICE(&s->xdma), 0,
> +   qdev_get_gpio_in(DEVICE(&s->vic), 6));
>  }
>  
>  static void aspeed_soc_class_init(ObjectClass *oc, void *data)
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index 77b9df9..a4483af 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -74,7 +74,7 @@ obj-$(CONFIG_ARMSSE_MHU) += armsse-mhu.o
>  
>  obj-$(CONFIG_PVPANIC) += pvpanic.o
>  obj-$(CONFIG_AUX) += auxbus.o
> -obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o
> +obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o aspeed_xdma.o
>  obj-$(CONFIG_MSF2) += msf2-sysreg.o
>  obj-$(CONFIG_NRF51_SOC) += nrf51_rng.o
>  
> diff --git a/hw/misc/aspeed_xdma.c b/hw/misc/aspeed_xdma.c
> new file mode 100644
> index 000..fe3a32e
> --- /dev/null
> +++ b/hw/misc/aspeed_xdma.c
> @@ -0,0 +1,156 @@
> +/*
> + * ASPEED XDMA Controller
> + * Eddie James 
> + *
> + * Copyright (C) 2019 IBM Corp
> + * SPDX-License-Identifer: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qemu/error-report.h"
> +#include "hw/misc/aspeed_xdma.h"
> +#include "qapi/error.h"
> +
> +#define XDMA_BMC_CMDQ_ADDR 0x10
> +#define XDMA_BMC_CMDQ_ENDP 0x14
> +#define XDMA_BMC_CMDQ_WRP  0x18
> +#define  XDMA_BMC_CMDQ_W_MASK  0x0003
> +#define XDMA_BMC_CMDQ_RDP  0x1C
> +#define  XDMA_BMC_CMDQ_RDP_MAGIC   0xEE882266
> +#define XDMA_IRQ_ENG_CTRL  0x20
> +#define  XDMA_IRQ_ENG_CTRL_US_COMP BIT(4)
> +#define  XDMA_IRQ_ENG_CTRL_DS_COMP BIT(5)
> +#define  XDMA_IRQ_ENG_CTRL_W_MASK  0xBFEFF07F
> +#define XDMA_IRQ_ENG_STAT  0x24
> +#define  XDMA_IRQ_ENG_STAT_US_COMP BIT(4)
> +#define  XDMA_IRQ_ENG_STAT_DS_COMP BIT(5)
> +#define  XDMA_IRQ_ENG_STAT_RESET   0xF800
> +
> +#define TO_REG(addr) ((addr) / sizeof(uint32_t))
> +
> +static uint64_t aspeed_xdma_read(void *opaque, hwaddr addr, unsigned int 
> size)
> +{
> +uint32_t val = 0;
> +AspeedXDMAState *xdma = opaque;
> +
> +if (addr < ASPEED_XDMA_REG_SIZE) {
> +val = xdma->regs[TO_REG(addr)];
> +}
> +
> +return (uint64_t)val;
> +}
> +
> +static void aspeed_xdma_write(void *opaque, hwaddr addr, uint64_t val,
> +  unsigned int size)
> +{
> +unsigned int idx;
> +uint32_t val32 = (uint32_t)val;
> +AspeedXDMAState *xdma = opaque;
> +

Re: [Qemu-devel] [PATCH v2 2/5] virtio: Set "start_on_kick" for legacy devices

2019-06-05 Thread Greg Kurz
On Wed, 5 Jun 2019 14:49:34 +0800
Yongji Xie  wrote:

> On Wed, 5 Jun 2019 at 14:42, Greg Kurz  wrote:
> >
> > On Tue,  4 Jun 2019 15:34:56 +0800
> > elohi...@gmail.com wrote:
> >  
> > > From: Xie Yongji 
> > >
> > > Besides virtio 1.0 transitional devices, we should also
> > > set "start_on_kick" flag for legacy devices (virtio 0.9).
> > >
> > > Signed-off-by: Xie Yongji 
> > > ---  
> >
> > Patch looks good but it would be even better if applied
> > earlier so that it doesn't revert lines added by the
> > previous patch...
> >  
> 
> Fine with me. Will do it in v3.
> 

Hold on before posting, I've just learned about hw_compat_4_0_1 while
reviewing patch 5... need so more time to understand the impact.

Cheers,

--
Greg

> Thanks,
> Yongji




Re: [Qemu-devel] [PATCH 04/13] target/arm/kvm: Move the get/put of fpsimd registers out

2019-06-05 Thread Auger Eric
Hi Drew,

On 5/12/19 10:36 AM, Andrew Jones wrote:
> Move the getting/putting of the fpsimd registers out of
> kvm_arch_get/put_registers() into their own helper functions
> to prepare for alternatively getting/putting SVE registers.
> 
> No functional change.
> 
> Signed-off-by: Andrew Jones 
> ---
>  target/arm/kvm64.c | 148 +++--
>  1 file changed, 88 insertions(+), 60 deletions(-)
> 
> diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> index ba232b27a6d3..61947f3716e1 100644
> --- a/target/arm/kvm64.c
> +++ b/target/arm/kvm64.c
> @@ -706,13 +706,53 @@ int kvm_arm_cpreg_level(uint64_t regidx)
>  #define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
>   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
>  
> +static int kvm_arch_put_fpsimd(CPUState *cs)
> +{
> +ARMCPU *cpu = ARM_CPU(cs);
> +CPUARMState *env = &cpu->env;
> +struct kvm_one_reg reg;
> +uint32_t fpr;
> +int i, ret;
> +
> +for (i = 0; i < 32; i++) {
> +uint64_t *q = aa64_vfp_qreg(env, i);
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t fp_val[2] = { q[1], q[0] };
> +reg.addr = (uintptr_t)fp_val;
> +#else
> +reg.addr = (uintptr_t)q;
> +#endif
> +reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
> +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +}
> +}
> +
> +reg.addr = (uintptr_t)(&fpr);
> +fpr = vfp_get_fpsr(env);
> +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
> +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +}
> +
> +reg.addr = (uintptr_t)(&fpr);
I don't think you need this assignment
> +fpr = vfp_get_fpcr(env);
> +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
> +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +}
> +
> +return 0;
> +}
> +
>  int kvm_arch_put_registers(CPUState *cs, int level)
>  {
>  struct kvm_one_reg reg;
> -uint32_t fpr;
>  uint64_t val;
> -int i;
> -int ret;
> +int i, ret;
>  unsigned int el;
>  
>  ARMCPU *cpu = ARM_CPU(cs);
> @@ -802,33 +842,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>  }
>  }
>  
> -/* Advanced SIMD and FP registers. */
> -for (i = 0; i < 32; i++) {
> -uint64_t *q = aa64_vfp_qreg(env, i);
> -#ifdef HOST_WORDS_BIGENDIAN
> -uint64_t fp_val[2] = { q[1], q[0] };
> -reg.addr = (uintptr_t)fp_val;
> -#else
> -reg.addr = (uintptr_t)q;
> -#endif
> -reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
> -ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> -if (ret) {
> -return ret;
> -}
> -}
> -
> -reg.addr = (uintptr_t)(&fpr);
> -fpr = vfp_get_fpsr(env);
> -reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
> -ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> -if (ret) {
> -return ret;
> -}
> -
> -fpr = vfp_get_fpcr(env);
> -reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
> -ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> +ret = kvm_arch_put_fpsimd(cs);
>  if (ret) {
>  return ret;
>  }
> @@ -849,14 +863,54 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>  return ret;
>  }
>  
> +static int kvm_arch_get_fpsimd(CPUState *cs)
> +{
> +ARMCPU *cpu = ARM_CPU(cs);
> +CPUARMState *env = &cpu->env;
> +struct kvm_one_reg reg;
> +uint32_t fpr;
> +int i, ret;
> +
> +for (i = 0; i < 32; i++) {
> +uint64_t *q = aa64_vfp_qreg(env, i);
> +reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
> +reg.addr = (uintptr_t)q;
> +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +} else {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t t;
> +t = q[0], q[0] = q[1], q[1] = t;
> +#endif
> +}
> +}
> +
> +reg.addr = (uintptr_t)(&fpr);
> +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
> +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +}
> +vfp_set_fpsr(env, fpr);
> +
> +reg.addr = (uintptr_t)(&fpr);
same here
> +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
> +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®);
> +if (ret) {
> +return ret;
> +}
> +vfp_set_fpcr(env, fpr);
> +
> +return 0;
> +}
> +
>  int kvm_arch_get_registers(CPUState *cs)
>  {
>  struct kvm_one_reg reg;
>  uint64_t val;
> -uint32_t fpr;
>  unsigned int el;
> -int i;
> -int ret;
> +int i, ret;
>  
>  ARMCPU *cpu = ARM_CPU(cs);
>  CPUARMState *env = &cpu->env;
> @@ -945,36 +999,10 @@ int kvm_arch_get_registers(CPUState *cs)
>  env->spsr = env->banked_spsr[i];
>  }
>  
> -/* Advanced SIMD and FP registers */
> -for (i = 0; i < 32; i++) {
> -uint64_t *q = aa64

Re: [Qemu-devel] [PATCH v2 2/5] virtio: Set "start_on_kick" for legacy devices

2019-06-05 Thread Yongji Xie
On Wed, 5 Jun 2019 at 15:14, Greg Kurz  wrote:
>
> On Wed, 5 Jun 2019 14:49:34 +0800
> Yongji Xie  wrote:
>
> > On Wed, 5 Jun 2019 at 14:42, Greg Kurz  wrote:
> > >
> > > On Tue,  4 Jun 2019 15:34:56 +0800
> > > elohi...@gmail.com wrote:
> > >
> > > > From: Xie Yongji 
> > > >
> > > > Besides virtio 1.0 transitional devices, we should also
> > > > set "start_on_kick" flag for legacy devices (virtio 0.9).
> > > >
> > > > Signed-off-by: Xie Yongji 
> > > > ---
> > >
> > > Patch looks good but it would be even better if applied
> > > earlier so that it doesn't revert lines added by the
> > > previous patch...
> > >
> >
> > Fine with me. Will do it in v3.
> >
>
> Hold on before posting, I've just learned about hw_compat_4_0_1 while
> reviewing patch 5... need so more time to understand the impact.
>

Sure.

Thanks,
Yongji



Re: [Qemu-devel] [PATCH 01/13] target/arm/kvm64: fix error returns

2019-06-05 Thread Auger Eric
Hi,

On 5/12/19 10:36 AM, Andrew Jones wrote:
> A couple return -EINVAL's forget their '-'s.
> 
> Signed-off-by: Andrew Jones 
> ---
>  target/arm/kvm64.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> index e3ba1492482f..ba232b27a6d3 100644
> --- a/target/arm/kvm64.c
> +++ b/target/arm/kvm64.c
> @@ -841,7 +841,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>  write_cpustate_to_list(cpu, true);
>  
>  if (!write_list_to_kvmstate(cpu, level)) {
> -return EINVAL;
> +return -EINVAL;
>  }
>  
>  kvm_arm_sync_mpstate_to_kvm(cpu);
> @@ -982,7 +982,7 @@ int kvm_arch_get_registers(CPUState *cs)
>  }
>  
>  if (!write_kvmstate_to_list(cpu)) {
> -return EINVAL;
> +return -EINVAL;
>  }
>  /* Note that it's OK to have registers which aren't in CPUState,
>   * so we can ignore a failure return here.
> 
note, if I am not wrong, in existing call sites the returned value is
never tested actually.

Reviewed-by: Eric Auger 

Thanks

Eric



Re: [Qemu-devel] [PULL 29/44] spapr/xive: introduce a VM state change handler

2019-06-05 Thread Alexey Kardashevskiy



On 04/06/2019 18:10, Cédric Le Goater wrote:
> On 04/06/2019 09:49, Alexey Kardashevskiy wrote:
>>
>>
>> On 29/05/2019 16:50, David Gibson wrote:
>>> From: Cédric Le Goater 
>>>
>>> This handler is in charge of stabilizing the flow of event notifications
>>> in the XIVE controller before migrating a guest. This is a requirement
>>> before transferring the guest EQ pages to a destination.
>>>
>>> When the VM is stopped, the handler sets the source PQs to PENDING to
>>> stop the flow of events and to possibly catch a triggered interrupt
>>> occuring while the VM is stopped. Their previous state is saved. The
>>> XIVE controller is then synced through KVM to flush any in-flight
>>> event notification and to stabilize the EQs. At this stage, the EQ
>>> pages are marked dirty to make sure the EQ pages are transferred if a
>>> migration sequence is in progress.
>>>
>>> The previous configuration of the sources is restored when the VM
>>> resumes, after a migration or a stop. If an interrupt was queued while
>>> the VM was stopped, the handler simply generates the missing trigger.
>>>
>>> Signed-off-by: Cédric Le Goater 
>>> Reviewed-by: David Gibson 
>>> Message-Id: <20190513084245.25755-6-...@kaod.org>
>>> Signed-off-by: David Gibson 
>>
>> This one breaks my nvlink2 passthru setup. The host is v5.2-rc2.
>> v5.2-rc3 fixes it though so it is backward compatibility issue which we
>> care about to what degree here? 
> 
> v5.2-rc2 had an ugly bug impacting passthru under some VM configuration,
> XIVE + single CPU. See :
> 
> bcaa3110d584 ("KVM: PPC: Book3S HV: XIVE: Fix page offset when clearing 
> ESB pages")
> 
> passthru also had a serious issue impacting the XICS-over-XIVE and the 
> XIVE KVM devices :   
> 
> ef9740204051 ("KVM: PPC: Book3S HV: XIVE: Do not clear IRQ data of 
> passthrough interrupts")
> 
> You need an v5.2-rc3 ! 


Yeah, that works. And released v5.1 works too so we do not need to worry
:) Thanks for explanation.


> 
>> I am forcing ic-mode=xive which is not the default so I am not so sure.
> 
> It should be OK.
> 
> C. 
> 
>>
>>
>>
>> aik@u1804kvm:~$ cat /proc/interrupts
>>CPU0
>>  16:  0  XIVE-IPI   0 Edge  IPI
>>  21:  0  XIVE-IRQ 4096 Edge  RAS_EPOW
>>  22:  0  XIVE-IRQ 4097 Edge  RAS_HOTPLUG
>> 257:  12372  XIVE-IRQ 4353 Edge  ibmvscsi
>> 258:  0  XIVE-IRQ 4864 Edge  virtio0-config
>> 259:   2157  XIVE-IRQ 4865 Edge  virtio0-input.0
>> 260:  1  XIVE-IRQ 4866 Edge  virtio0-output.0
>> 261:  0  XIVE-IRQ 4868 Edge  xhci_hcd
>> 262:  0  XIVE-IRQ 4869 Edge  xhci_hcd
>> 272:  1  XIVE-IRQ 4368 Edge  hvc_console
>> LOC:  10508   Local timer interrupts for timer event device
>> BCT:  0   Broadcast timer interrupts for timer event device
>> LOC:  0   Local timer interrupts for others
>> SPU:  5   Spurious interrupts
>> PMI:  0   Performance monitoring interrupts
>> MCE:  0   Machine check exceptions
>> NMI:  0   System Reset interrupts
>> DBL:  0   Doorbell interrupts
>>
>>
>> and 7bfc759c02b8 "spapr/xive: add state synchronization with KVM" works:
>>
>>CPU0
>>  16:  0  XIVE-IPI   0 Edge  IPI
>>  19:  0  XIVE-IRQ 4610 Level NPU Device
>>  20:  0  XIVE-IRQ 4611 Level NPU Device
>>  21:  0  XIVE-IRQ 4096 Edge  RAS_EPOW
>>  22:  0  XIVE-IRQ 4097 Edge  RAS_HOTPLUG
>> 257:  11833  XIVE-IRQ 4353 Edge  ibmvscsi
>> 258:  0  XIVE-IRQ 4864 Edge  virtio0-config
>> 259:   1632  XIVE-IRQ 4865 Edge  virtio0-input.0
>> 260:  1  XIVE-IRQ 4866 Edge  virtio0-output.0
>> 261:  0  XIVE-IRQ 4868 Edge  xhci_hcd
>> 262:  0  XIVE-IRQ 4869 Edge  xhci_hcd
>> 263: 60  XIVE-IRQ 4867 Edge  nvidia
>> 272:  0  XIVE-IRQ 4368 Edge  hvc_console
>> LOC:   2236   Local timer interrupts for timer event device
>> BCT:  0   Broadcast timer interrupts for timer event device
>> LOC:  0   Local timer interrupts for others
>> SPU:  2   Spurious interrupts
>> PMI:  0   Performance monitoring interrupts
>> MCE:  0   Machine check exceptions
>> NMI:  0   System Reset interrupts
>> DBL:  0   Doorbell interrupts
>>
>>
>>
>> Here is the command line:
>>
>> /home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
>> -nodefaults \
>> -chardev stdio,id=STDIO0,signal=off,mux=on \
>> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
>> -mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
>> -enable-kvm \
>> -device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
>> -netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
>> -device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
>> img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
>> -machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive \
>> -device "vfio-pci,id=vf

Re: [Qemu-devel] [PATCH RFC v20 3/8] target/avr: Add mechanism to check for active debugger connection

2019-06-05 Thread Michael Rolnik
Hi Richard.

I am still struggling with this one.

The spec says.
The BREAK instruction is used by the On-chip Debug system, and is normally
not used in the application software.
When the BREAK instruction is executed, the AVR CPU is set in the Stopped
Mode.
This gives the On-chip Debugger access to internal resources.
If any Lock bits are set, or either the JTAGEN or OCDEN Fuses are
unprogrammed, the CPU will treat the BREAK instruction as a NOP and will
not enter the Stopped mode.

I read is as follows
- If user has an intention of using debugger, BREAK should be translated to
QEMU debug breakpoint
- If user has no intention of using debugger, BREAK should be translated
into NOP.

however it seems that rising EXCP_DEBUG crashes QEMU when no debugger is
present or *-s* was not supplied.

This lead me to the following.
- checking for active GDB connection is not good, because it might change
but translated BREAK will not
- if *-s* is supplied BREAK should always raise EXCP_DEBUG exception
- if *-s* is not supplied BREAK should be translated into NOP

What do you think? How to check existence of *-s* option?

Regards,
Michael



On Mon, Jun 3, 2019 at 8:04 PM Michael Rolnik  wrote:

> Got it.
>
> Sent from my cell phone, please ignore typos
>
> On Mon, Jun 3, 2019, 7:37 PM Richard Henderson <
> richard.hender...@linaro.org> wrote:
>
>> On 6/3/19 11:29 AM, Michael Rolnik wrote:
>> > 1. There's a break
>> > instruction
>> https://www.microchip.com/webdoc/avrassembler/avrassembler.wb_BREAK.html
>> > 2. There's a set of tests that use break.
>> >
>> > So I assume I have to implement this instruction as described in the
>> spec.
>>
>> The spec talks about fuses, not gdb.  A valid implementation of this
>> instruction is a no-op -- it say so right there in the spec.
>>
>> What does it mean to "test" break?  AFAIK, you can't test this at all from
>> within the cpu itself, since it does not generate a cpu-level exception.
>>
>> If gdb is setting a breakpoint via -S, it should be done via
>> cpu_breakpoint_test.
>>
>>
>>
>> > On Mon, Jun 3, 2019, 6:44 PM Richard Henderson <
>> richard.hender...@linaro.org
>> > > wrote:
>> >
>> > On 6/1/19 4:12 PM, Michael Rolnik wrote:
>> > > Hi Richard.
>> > >
>> > > If I implement it this way
>> > >
>> > > ```
>> > >  static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a)
>> > >  {
>> > >  if (avr_feature(ctx->env, AVR_FEATURE_BREAK) == false) {
>> > >  gen_helper_unsupported(cpu_env);
>> > >  } else {
>> > >  tcg_gen_movi_tl(cpu_pc, ctx->inst[0].npc);
>> > >  gen_helper_debug(cpu_env);
>> > >  }
>> > >
>> > >  ctx->bstate = BS_EXCP;
>> > >
>> > >  return true;
>> > >  }
>> > > ```
>> > >
>> > > qemu (without -s -S flags) crashes when debugger is not connected
>> >
>> > I was not suggesting using the internal qemu EXCP_DEBUG, but
>> another AVR
>> > specific exception, much the same way as every other cpu has a
>> cpu-specific
>> > debug exception.
>> >
>> > Or perhaps always do nothing.  Why is gdb insertting BREAK in the
>> first place?
>> >  It should be using the "hardware breakpoint" support that qemu
>> advertises as
>> > part of the gdbstub protocol, and that you support here:
>> >
>> > > +if (unlikely(cpu_breakpoint_test(cs, OFFSET_CODE + cpc *
>> 2, BP_ANY))
>> > > + || cpu_breakpoint_test(cs, OFFSET_DATA + cpc *
>> 2,
>> > BP_ANY)) {
>> > > +tcg_gen_movi_i32(cpu_pc, cpc);
>> > > +gen_helper_debug(cpu_env);
>> > > +ctx.bstate = BS_EXCP;
>> > > +goto done_generating;
>> > > +}
>>
>>

-- 
Best Regards,
Michael Rolnik


Re: [Qemu-devel] [PATCH 02/13] update-linux-headers: Add sve_context.h to asm-arm64

2019-06-05 Thread Auger Eric
Hi,

On 5/12/19 10:36 AM, Andrew Jones wrote:
> Signed-off-by: Andrew Jones 
> ---
>  scripts/update-linux-headers.sh | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
> index c3819d2b983d..e1fce54f8aa3 100755
> --- a/scripts/update-linux-headers.sh
> +++ b/scripts/update-linux-headers.sh
> @@ -99,6 +99,9 @@ for arch in $ARCHLIST; do
>  cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
>  done
>  
> +if [ $arch = arm64 ]; then
> +cp "$tmpdir/include/asm/sve_context.h" 
> "$output/linux-headers/asm-arm64/"
> +fi
>  if [ $arch = mips ]; then
>  cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
>  cp "$tmpdir/include/asm/unistd_o32.h" 
> "$output/linux-headers/asm-mips/"
> 
Reviewed-by: Eric Auger 

Eric



Re: [Qemu-devel] [PULL 26/44] spapr/xive: add KVM support

2019-06-05 Thread Alexey Kardashevskiy



On 04/06/2019 17:54, Cédric Le Goater wrote:
> On 04/06/2019 09:23, Alexey Kardashevskiy wrote:
>>
>>
>> On 29/05/2019 16:49, David Gibson wrote:
>>> From: Cédric Le Goater 
>>>
>>> This introduces a set of helpers when KVM is in use, which create the
>>> KVM XIVE device, initialize the interrupt sources at a KVM level and
>>> connect the interrupt presenters to the vCPU.
>>>
>>> They also handle the initialization of the TIMA and the source ESB
>>> memory regions of the controller. These have a different type under
>>> KVM. They are 'ram device' memory mappings, similarly to VFIO, exposed
>>> to the guest and the associated VMAs on the host are populated
>>> dynamically with the appropriate pages using a fault handler.
>>>
>>> Signed-off-by: Cédric Le Goater 
>>> Reviewed-by: David Gibson 
>>> Message-Id: <20190513084245.25755-3-...@kaod.org>
>>> Signed-off-by: David Gibson 
>>
>>
>> This one breaks my setup - it boots up to:
>>
>>
>> ipr: IBM Power RAID SCSI Device Driver version: 2.6.4 (March 14, 2017)
>> __vio_register_driver: driver ibmvscsi registering
>> ibmvscsi 7101: SRP_VERSION: 16.a
>> ibmvscsi 7101: Maximum ID: 64 Maximum LUN: 32 Maximum Channel: 3
>> scsi host0: IBM POWER Virtual SCSI Adapter 1.5.9
>>
>>
>> and hangs. Here is the command line:
>>
>>
>> /home/aik/pbuild/qemu-aikrhel74alt-ppc64/ppc64-softmmu/qemu-system-ppc64 \
>> -nodefaults \
>> -chardev stdio,id=STDIO0,signal=off,mux=on \
>> -device spapr-vty,id=svty0,reg=0x71000110,chardev=STDIO0 \
>> -mon id=MON0,chardev=STDIO0,mode=readline -nographic -vga none \
>> -enable-kvm \
>> -device nec-usb-xhci,id=nec-usb-xhci0 -m 16G \
>> -netdev "user,id=USER0,hostfwd=tcp::2223-:22" \
>> -device "virtio-net-pci,id=vnet0,mac=C0:41:49:4b:00:00,netdev=USER0" \
>> img/u1804-64G-cuda10.1-418.67-swiotlb.qcow2 \
>> -machine pseries,cap-cfpc=broken,cap-htm=off,ic-mode=xive -snapshot \
>> -smp 1,threads=1 -bios ./slof.bin \
>> -L /home/aik/t/qemu-ppc64-bios/ \
>> -trace events=qemu_trace_events -d guest_errors \
>> -chardev socket,id=SOCKET0,server,nowait,path=qemu.mon.user2223 \
>> -mon chardev=SOCKET0,mode=control
> 
> At this level of patch 38afd772f802 ("spapr/xive: add KVM support"), I am
> surprised this is even starting. 
> 
> The test in spapr_irq_init_xive() : 
> 
> /* KVM XIVE device not yet available */
> if (kvm_enabled()) {
> if (machine_kernel_irqchip_required(machine)) {
> error_setg(errp, "kernel_irqchip requested. no KVM XIVE support");
> return;
> }
> }
> 
> should fail. This is removed later in 0dc9f5f8496a ("spapr/xive: activate 
> KVM support")
> 
>> The host kernel is v5.2-rc2. The next patch - 0c575703e487 "spapr/xive:
>> add hcall support when under KVM" - fixes this though but the question
>> is now if xive emulation in qemu still works (how do I verify it?).
> 
> kernel_irqchip=off should activate the QEMU XIVE device.
> 
> Are you testing bisection ?


I was bisecting as I originally wanted to test the recent David's spapr
pci rework and things broke again, although differently, hence the noise
I made about xive.



-- 
Alexey



Re: [Qemu-devel] [PATCH 04/13] target/arm/kvm: Move the get/put of fpsimd registers out

2019-06-05 Thread Andrew Jones
On Wed, Jun 05, 2019 at 09:15:49AM +0200, Auger Eric wrote:
> Hi Drew,
> 
> On 5/12/19 10:36 AM, Andrew Jones wrote:
> > Move the getting/putting of the fpsimd registers out of
> > kvm_arch_get/put_registers() into their own helper functions
> > to prepare for alternatively getting/putting SVE registers.
> > 
> > No functional change.
> > 
> > Signed-off-by: Andrew Jones 
> > ---
> >  target/arm/kvm64.c | 148 +++--
> >  1 file changed, 88 insertions(+), 60 deletions(-)
> > 
> > diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> > index ba232b27a6d3..61947f3716e1 100644
> > --- a/target/arm/kvm64.c
> > +++ b/target/arm/kvm64.c
> > @@ -706,13 +706,53 @@ int kvm_arm_cpreg_level(uint64_t regidx)
> >  #define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
> >   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
> >  
> > +static int kvm_arch_put_fpsimd(CPUState *cs)
> > +{
> > +ARMCPU *cpu = ARM_CPU(cs);
> > +CPUARMState *env = &cpu->env;
> > +struct kvm_one_reg reg;
> > +uint32_t fpr;
> > +int i, ret;
> > +
> > +for (i = 0; i < 32; i++) {
> > +uint64_t *q = aa64_vfp_qreg(env, i);
> > +#ifdef HOST_WORDS_BIGENDIAN
> > +uint64_t fp_val[2] = { q[1], q[0] };
> > +reg.addr = (uintptr_t)fp_val;
> > +#else
> > +reg.addr = (uintptr_t)q;
> > +#endif
> > +reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
> > +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> > +if (ret) {
> > +return ret;
> > +}
> > +}
> > +
> > +reg.addr = (uintptr_t)(&fpr);
> > +fpr = vfp_get_fpsr(env);
> > +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
> > +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®);
> > +if (ret) {
> > +return ret;
> > +}
> > +
> > +reg.addr = (uintptr_t)(&fpr);
> I don't think you need this assignment

You're right, and they weren't in the original code, but I added them
in order to be sure that the register get/set blocks were complete
units. It's one thing to factor stuff like that out of a loop, but
here we had two almost independent blocks so it looked a bit odd.

[...]

> 
> Besides
> Reviewed-by: Eric Auger 
>

Thanks,
drew 



Re: [Qemu-devel] [PATCH 02/13] update-linux-headers: Add sve_context.h to asm-arm64

2019-06-05 Thread Andrew Jones
On Wed, Jun 05, 2019 at 09:21:18AM +0200, Auger Eric wrote:
> Hi,
> 
> On 5/12/19 10:36 AM, Andrew Jones wrote:
> > Signed-off-by: Andrew Jones 
> > ---
> >  scripts/update-linux-headers.sh | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/scripts/update-linux-headers.sh 
> > b/scripts/update-linux-headers.sh
> > index c3819d2b983d..e1fce54f8aa3 100755
> > --- a/scripts/update-linux-headers.sh
> > +++ b/scripts/update-linux-headers.sh
> > @@ -99,6 +99,9 @@ for arch in $ARCHLIST; do
> >  cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
> >  done
> >  
> > +if [ $arch = arm64 ]; then
> > +cp "$tmpdir/include/asm/sve_context.h" 
> > "$output/linux-headers/asm-arm64/"
> > +fi
> >  if [ $arch = mips ]; then
> >  cp "$tmpdir/include/asm/sgidefs.h" 
> > "$output/linux-headers/asm-mips/"
> >  cp "$tmpdir/include/asm/unistd_o32.h" 
> > "$output/linux-headers/asm-mips/"
> > 
> Reviewed-by: Eric Auger 
>

Thanks, but I noticed we don't need this anymore. Not since b1b9e0dc78a8e
applied an identical patch. I've dropped this from the v2 I'm working on
now.

Thanks,
drew



Re: [Qemu-devel] [Qemu-ppc] [PULL 09/29] target/ppc: introduce get_cpu_vsr{l, h}() and set_cpu_vsr{l, h}() helpers for VSR register access

2019-06-05 Thread Laurent Vivier
On 05/06/2019 08:32, Mark Cave-Ayland wrote:
> On 04/06/2019 22:23, Laurent Vivier wrote:
> 
>> This patch breaks something in the libcrypto.
>>
>> I've been able to reproduce the problem with Fedora 29:
>>
>> dnf install 'dnf-command(download)'
>> dnf download --source coreutils-single
>> rpm --verify coreutils-8.30-7.fc29.src.rpm
>> error: coreutils-8.30-7.fc29.src.rpm: Header V3 RSA/SHA256 Signature, key ID 
>> 429476b4: BAD
>> error: coreutils-8.30-7.fc29.src.rpm: Header SHA256 digest: BAD (Expected 
>> fa042669e74ac435bd5defaa8c2e4efe779a0329c24f2b6377591c53b38aa280 != 
>> d6e22527412fafc4aa12882432d0d157e5427097710eeb2d5fce8fbc52a47be6)
>> error: coreutils-8.30-7.fc29.src.rpm: not an rpm package (or package 
>> manifest)
>>
>> See https://bugzilla.redhat.com/show_bug.cgi?id=1715017
>>
>> I've tested with origin/master (47fbad45d47af8af784bb12a5719489edcd89b4c) 
>> and all the 
>> merged fixes for this patch don't fix this problem.
>>
>> We should be able to reproduce it on Debian Sid too: it breaks ssh (this is 
>> one of the rare binaries using libcrypto on debian).
>>
>> I've been able to reproduce it with qemu linux-user if I enable 
>> PPC_FEATURE2_VEC_CRYPTO in linux-user/elfload.c
>> (git clone -b linux-user-ppc64-hwcap g...@github.com:vivier/qemu.git).
>>
>> To ease debugging, you can install a Fedora 29 chroot with something like:
>>
>> curl -o container.tar.xz 
>> http://download-ib01.fedoraproject.org/pub/fedora-secondary/releases/29/Container/ppc64le/images/Fedora-Container-Base-29-1.2.ppc64le.tar.xz
>> tar Jxvf container.tar.xz '*/layer.tar'
>> mkdir -p chroot/ppc64le/29
>> cd chroot/ppc64le/29
>> tar xf $OLDPWD/*/layer.tar
>> cd -
>> cp ~/qemu/ppc64le-linux-user/qemu-ppc64le chroot/ppc64le/29/
>> [use "configure --target-list=ppc64le-linux-user --static --disable-tools" 
>> and don't forget to run scripts/qemu-binfmt-conf.sh]
> 
> One of Anton's VSX patches hasn't landed in master yet and is still queued in
> ppc-for-4.1: "target/ppc: Fix lxvw4x, lxvh8x and lxvb16x".
> 
> Can you try David's ppc-for-4.1 branch first and let me know if that solves 
> the
> issue? If not, I'll take a look at it later in the week when I have a bit 
> more time.

Thank you Mark.

Anton's patch fixes the problem.

Thanks,
Laurent



Re: [Qemu-devel] [Qemu-block] [PATCH v2 1/5] block/nvme: don't flip CQ phase bits

2019-06-05 Thread Maxim Levitsky
On Mon, 2019-06-03 at 18:25 -0400, John Snow wrote:
> 
> On 4/17/19 3:53 PM, Maxim Levitsky wrote:
> > Phase bits are only set by the hardware to indicate new completions
> > and not by the device driver.
> > 
> > Signed-off-by: Maxim Levitsky 
> > ---
> >  block/nvme.c | 2 --
> >  1 file changed, 2 deletions(-)
> > 
> > diff --git a/block/nvme.c b/block/nvme.c
> > index 0684bbd077..2d208000df 100644
> > --- a/block/nvme.c
> > +++ b/block/nvme.c
> > @@ -340,8 +340,6 @@ static bool nvme_process_completion(BDRVNVMeState *s, 
> > NVMeQueuePair *q)
> >  qemu_mutex_lock(&q->lock);
> >  c->cid = cpu_to_le16(0);
> >  q->inflight--;
> > -/* Flip Phase Tag bit. */
> > -c->status = cpu_to_le16(le16_to_cpu(c->status) ^ 0x1);
> >  progress = true;
> >  }
> >  if (progress) {
> > 
> 
> Since you've not got much traction on this and you've pinged a v2, can
> you point me to a spec or a reproducer that illustrates the problem?
> 
> (Or wait for more NVME knowledgeable people to give you a review...!)

"A Completion Queue entry is posted to the Completion Queue when the controller 
write of that Completion
Queue entry to the next free Completion Queue slot inverts the Phase Tag (P) 
bit from its previous value
in memory. The controller may generate an interrupt to the host to indicate 
that one or more Completion
Queue entries have been posted."



Best regards,
Maxim Levitsky




Re: [Qemu-devel] [Qemu-ppc] [PULL 09/29] target/ppc: introduce get_cpu_vsr{l, h}() and set_cpu_vsr{l, h}() helpers for VSR register access

2019-06-05 Thread Mark Cave-Ayland
On 05/06/2019 08:43, Laurent Vivier wrote:

> On 05/06/2019 08:32, Mark Cave-Ayland wrote:
>> On 04/06/2019 22:23, Laurent Vivier wrote:
>>
>>> This patch breaks something in the libcrypto.
>>>
>>> I've been able to reproduce the problem with Fedora 29:
>>>
>>> dnf install 'dnf-command(download)'
>>> dnf download --source coreutils-single
>>> rpm --verify coreutils-8.30-7.fc29.src.rpm
>>> error: coreutils-8.30-7.fc29.src.rpm: Header V3 RSA/SHA256 Signature, key 
>>> ID 429476b4: BAD
>>> error: coreutils-8.30-7.fc29.src.rpm: Header SHA256 digest: BAD (Expected 
>>> fa042669e74ac435bd5defaa8c2e4efe779a0329c24f2b6377591c53b38aa280 != 
>>> d6e22527412fafc4aa12882432d0d157e5427097710eeb2d5fce8fbc52a47be6)
>>> error: coreutils-8.30-7.fc29.src.rpm: not an rpm package (or package 
>>> manifest)
>>>
>>> See https://bugzilla.redhat.com/show_bug.cgi?id=1715017
>>>
>>> I've tested with origin/master (47fbad45d47af8af784bb12a5719489edcd89b4c) 
>>> and all the 
>>> merged fixes for this patch don't fix this problem.
>>>
>>> We should be able to reproduce it on Debian Sid too: it breaks ssh (this is 
>>> one of the rare binaries using libcrypto on debian).
>>>
>>> I've been able to reproduce it with qemu linux-user if I enable 
>>> PPC_FEATURE2_VEC_CRYPTO in linux-user/elfload.c
>>> (git clone -b linux-user-ppc64-hwcap g...@github.com:vivier/qemu.git).
>>>
>>> To ease debugging, you can install a Fedora 29 chroot with something like:
>>>
>>> curl -o container.tar.xz 
>>> http://download-ib01.fedoraproject.org/pub/fedora-secondary/releases/29/Container/ppc64le/images/Fedora-Container-Base-29-1.2.ppc64le.tar.xz
>>> tar Jxvf container.tar.xz '*/layer.tar'
>>> mkdir -p chroot/ppc64le/29
>>> cd chroot/ppc64le/29
>>> tar xf $OLDPWD/*/layer.tar
>>> cd -
>>> cp ~/qemu/ppc64le-linux-user/qemu-ppc64le chroot/ppc64le/29/
>>> [use "configure --target-list=ppc64le-linux-user --static --disable-tools" 
>>> and don't forget to run scripts/qemu-binfmt-conf.sh]
>>
>> One of Anton's VSX patches hasn't landed in master yet and is still queued in
>> ppc-for-4.1: "target/ppc: Fix lxvw4x, lxvh8x and lxvb16x".
>>
>> Can you try David's ppc-for-4.1 branch first and let me know if that solves 
>> the
>> issue? If not, I'll take a look at it later in the week when I have a bit 
>> more time.
> 
> Thank you Mark.
> 
> Anton's patch fixes the problem.

Great! Also I don't know if you noticed but I posted a roll-up of the VSX fixes 
for
stable yesterday at
https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg00534.html so these 
will
eventually make it into the next 4.0 release.


ATB,

Mark.



Re: [Qemu-devel] [Qemu-block] [PATCH v2] nvme: add Get/Set Feature Timestamp support

2019-06-05 Thread Kevin Wolf
Am 04.06.2019 um 19:06 hat Heitke, Kenneth geschrieben:
> 
> 
> On 6/4/2019 3:13 AM, Klaus Birkelund wrote:
> > On Tue, Jun 04, 2019 at 10:46:45AM +0200, Kevin Wolf wrote:
> > > Am 04.06.2019 um 10:28 hat Klaus Birkelund geschrieben:
> > > > On Mon, Jun 03, 2019 at 09:30:53AM -0600, Heitke, Kenneth wrote:
> > > > > 
> > > > > 
> > > > > On 6/3/2019 5:14 AM, Kevin Wolf wrote:
> > > > > > Am 28.05.2019 um 08:18 hat Klaus Birkelund geschrieben:
> > > > > > > On Mon, May 20, 2019 at 11:40:30AM -0600, Kenneth Heitke wrote:
> > > > > > > > Signed-off-by: Kenneth Heitke 
> > > > > > 
> > > > > > > > diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> > > > > > > > index 56c9d4b4b1..d7277e72b7 100644
> > > > > > > > --- a/hw/block/nvme.h
> > > > > > > > +++ b/hw/block/nvme.h
> > > > > > > > @@ -69,6 +69,7 @@ typedef struct NvmeCtrl {
> > > > > > > >uint16_tmax_prp_ents;
> > > > > > > >uint16_tcqe_size;
> > > > > > > >uint16_tsqe_size;
> > > > > > > > +uint16_toncs;
> > > > > > > 
> > > > > > > Looks like this unused member snuck its way into the patch. But I 
> > > > > > > see no
> > > > > > > harm in it being there.
> > > > > > 
> > > > > > Good catch. I'll just remove it again from my branch.
> > > > > > 
> > > > > > > > +static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
> > > > > > > > +{
> > > > > > > > +trace_nvme_setfeat_timestamp(ts);
> > > > > > > > +
> > > > > > > > +n->host_timestamp = le64_to_cpu(ts);
> > > > > > > > +n->timestamp_set_qemu_clock_ms = 
> > > > > > > > qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
> > > > > > > > +{
> > > > > > > > +uint64_t current_time = 
> > > > > > > > qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> > > > > > 
> > > > > > Here I wonder why we use QEMU_CLOCK_REALTIME in a device emulation.
> > > > > > Wouldn't QEMU_CLOCK_VIRTUAL make more sense?
> > > > > > 
> > > > > 
> > > > > QEMU_CLOCK_VIRTUAL probably would make more sense. When I was reading
> > > > > through the differences I wasn't really sure what to pick. iven that 
> > > > > this is
> > > > > the time within the device's context, the virtual time seems more 
> > > > > correct.
> > > > > 
> > > > I thought about this too when I reviewed, but came to the conclusion
> > > > that REALTIME was correct. The timestamp is basically a value that the
> > > > host stores in the controller. When the host uses Get Features to get
> > > > the the current time it would expect it to match the progression for its
> > > > own wall clockright? If I understand REALTIME vs VIRTUAL correctly,
> > > > using VIRTUAL, it would go way out of sync.
> > > 
> > > Which two things would go out of sync with VIRTUAL?
> > > 
> > > Not an expert on clocks myself, but I think the main question is what
> > > happens to the clock while the VM is stopped. REALTIME continues running
> > > where as VIRTUAL is stopped. If we expose REALTIME measurements to the
> > > guest, the time passed may look a lot longer than what the guest's clock
> > > actually says. So this is the thing I am worried would go out of sync
> > > with REALTIME.
> > > 
> > 
> > OK, fair point.
> > 
> > Thinking about this some more, I agree that VIRTUAL is more correct. An
> > application should never track elapsed time using real wall clock time,
> > but some monotonic clock that is oblivious to say NTP adjustments.
> > 
> > Klaus
> > 
> 
> Kevin, would you like me to update the patch to reflect this change or will
> you make the change directly?

I already made it directly.

Kevin



[Qemu-devel] Problems with Pass through of a AMD Vega 56/64

2019-06-05 Thread James Courtier-Dutton
Hi,

Problem:
Pass through of a AMD Vega 56 or 64 on a AMD Threadripper 1950X with X399
Motherboard , Host OS: Linux Kernel 5.1.5, did not work.

Various problems were observed:
1) PCI BAR resizing -  The GPU tries to resize the BAR from 256M to 8G. The
resize fails, but the Guest does not know this, and continues on as if it
was resized, resulting in a VM_L2_PROTECTION_FAULT_STATUS in the guest.
Symptoms of (1):
Able to passthru most PCI devices except the GPU.
Solution:
This problem has been worked-around with (See other email by Alex W):
[PATCH] vfio/pci: Hide Resizable BAR capability
The reason the GPU was problematic, is because it was the only device that
tried to resize the BAR.
Fortunately, the AMD GPU driver can work with a 256M or 8G BAR.
The real fix will be the implementation of support for PCI BAR Resizing in
QEMU and VFIO.

2) The QEMU fails to start with GPU passthrough, in that it does not reach
the Seabios messages and sits at 100% CPU.
I verified that the AMD GPU card's option ROM contained both Legacy BIOS
and EFI images in it.
Removing the GPU PCI device in virt-manager allows the VM to boot, but
adding the GPU PCI device causes it to fail to boot.
Symptoms:
VM won't start, left with blank screen.
Solution:
Using Q35 and OVMF EFI works.
Using 440 and Seabios does not work.
My guess is there is a bug in Seabios preventing it from working.

3) Use case:  Have 2 AMD Vega CPUs and wish only one to be passthru to
guest and the other to be used by the Host.
Problem: Currently you can passthru both, or neither.
Solution:
I have written a patch for VFIO to handle this, but it is still work in
progress before it will be accepted into kernel mainstream.

Kind Regards

James


Re: [Qemu-devel] [PATCH 3/4] Clean up a few header guard symbols

2019-06-05 Thread Philippe Mathieu-Daudé
On 6/4/19 8:16 PM, Markus Armbruster wrote:
> Commit 58ea30f5145 "Clean up header guards that don't match their file
> name" messed up contrib/elf2dmp/qemu_elf.h and
> tests/migration/migration-test.h.
> 
> It missed target/cris/opcode-cris.h and
> tests/uefi-test-tools/UefiTestToolsPkg/Include/Guid/BiosTablesTest.h
> due to the scripts/clean-header-guards.pl bug fixed in the previous
> commit.
> 
> Commit a8b991b52dc "Clean up ill-advised or unusual header guards"
> missed include/hw/xen/io/ring.h for the same reason.
> 
> Commit 3979fca4b69 "disas: Rename include/disas/bfd.h back to
> include/disas/dis-asm.h" neglected to update the guard symbol for the
> rename.
> 
> Commit a331c6d7741 "semihosting: implement a semihosting console"
> created include/hw/semihosting/console.h with an ill-advised guard
> symbol.
> 
> Clean them up.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  contrib/elf2dmp/qemu_elf.h  | 5 ++---
>  include/disas/dis-asm.h | 6 +++---
>  include/hw/semihosting/console.h| 6 +++---
>  include/hw/xen/io/ring.h| 6 +++---
>  target/cris/opcode-cris.h   | 6 +++---
>  tests/migration/migration-test.h| 2 +-
>  .../UefiTestToolsPkg/Include/Guid/BiosTablesTest.h  | 6 +++---
>  7 files changed, 18 insertions(+), 19 deletions(-)
> 
> diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h
> index 66ee1f0ed5..b2f0d9cbc9 100644
> --- a/contrib/elf2dmp/qemu_elf.h
> +++ b/contrib/elf2dmp/qemu_elf.h
> @@ -2,11 +2,10 @@
>   * Copyright (c) 2018 Virtuozzo International GmbH
>   *
>   * This work is licensed under the terms of the GNU GPL, version 2 or later.
> - *
>   */
>  
> -#ifndef EMPF2DMP_QEMU_ELF_H
> -#define EMPF2DMP_QEMU_ELF_H
> +#ifndef ELF2DMP_QEMU_ELF_H
> +#define ELF2DMP_QEMU_ELF_H
>  
>  #include "elf.h"
>  
> diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
> index 9240ec32c2..e9c7dd8eb4 100644
> --- a/include/disas/dis-asm.h
> +++ b/include/disas/dis-asm.h
> @@ -6,8 +6,8 @@
> interface, for making instruction-processing programs more independent
> of the instruction set being processed.  */
>  
> -#ifndef DISAS_BFD_H
> -#define DISAS_BFD_H
> +#ifndef DISAS_DIS_ASM_H
> +#define DISAS_DIS_ASM_H
>  
>  typedef void *PTR;
>  typedef uint64_t bfd_vma;
> @@ -508,4 +508,4 @@ bfd_vma bfd_getl16 (const bfd_byte *addr);
>  bfd_vma bfd_getb16 (const bfd_byte *addr);
>  typedef bool bfd_boolean;
>  
> -#endif /* DISAS_BFD_H */
> +#endif /* DISAS_DIS_ASM_H */
> diff --git a/include/hw/semihosting/console.h 
> b/include/hw/semihosting/console.h
> index 30e66ae20a..9eb45b7c53 100644
> --- a/include/hw/semihosting/console.h
> +++ b/include/hw/semihosting/console.h
> @@ -6,8 +6,8 @@
>   * SPDX-License-Identifier: GPL-2.0-or-later
>   */
>  
> -#ifndef _SEMIHOST_CONSOLE_H_
> -#define _SEMIHOST_CONSOLE_H_
> +#ifndef SEMIHOST_CONSOLE_H
> +#define SEMIHOST_CONSOLE_H
>  
>  /**
>   * qemu_semihosting_console_out:
> @@ -35,4 +35,4 @@ int qemu_semihosting_console_out(CPUArchState *env, 
> target_ulong s, int len);
>   */
>  int qemu_semihosting_log_out(const char *s, int len);
>  
> -#endif /* _SEMIHOST_CONSOLE_H_ */
> +#endif /* SEMIHOST_CONSOLE_H */
> diff --git a/include/hw/xen/io/ring.h b/include/hw/xen/io/ring.h
> index 1adacf09f9..62abfd7a6e 100644
> --- a/include/hw/xen/io/ring.h
> +++ b/include/hw/xen/io/ring.h
> @@ -24,8 +24,8 @@
>   * Tim Deegan and Andrew Warfield November 2004.
>   */
>  
> -#ifndef __XEN_PUBLIC_IO_RING_H__
> -#define __XEN_PUBLIC_IO_RING_H__
> +#ifndef XEN_PUBLIC_IO_RING_H
> +#define XEN_PUBLIC_IO_RING_H
>  
>  /*
>   * When #include'ing this header, you need to provide the following
> @@ -469,7 +469,7 @@ struct name##_data_intf { 
> \
>  };   
>  \
>  DEFINE_XEN_FLEX_RING(name)
>  
> -#endif /* __XEN_PUBLIC_IO_RING_H__ */
> +#endif /* XEN_PUBLIC_IO_RING_H */
>  
>  /*
>   * Local variables:
> diff --git a/target/cris/opcode-cris.h b/target/cris/opcode-cris.h
> index e7ebb98cd0..40509c88db 100644
> --- a/target/cris/opcode-cris.h
> +++ b/target/cris/opcode-cris.h
> @@ -19,8 +19,8 @@ GNU General Public License for more details.
>  You should have received a copy of the GNU General Public License
>  along with this program; if not, see .  */
>  
> -#ifndef __CRIS_H_INCLUDED_
> -#define __CRIS_H_INCLUDED_
> +#ifndef TARGET_CRIS_OPCODE_CRIS_H
> +#define TARGET_CRIS_OPCODE_CRIS_H
>  
>  #if !defined(__STDC__) && !defined(const)
>  #define const
> @@ -345,7 +345,7 @@ extern const struct cris_opcode cris_opcodes[];
> sign-extended before adding to "target".  */
>  #define CRIS_DIS_FLAG_MEM_TARGET2_MEM_WORD (1 << 7)
>  
> -#endif /* __CRIS_H_INCLUDED_ */
> +#endif /* TARGET_CRIS_OPCODE_CRIS_H */
>  
>  /*
>

Re: [Qemu-devel] [Qemu-devel PATCH v2 1/2] sd: Fix out-of-bounds assertions

2019-06-05 Thread Philippe Mathieu-Daudé
On 6/5/19 8:21 AM, Lidong Chen wrote:
> Due to an off-by-one error, the assert statements allow an
> out-of-bound array access.

Not sure via which tree this patch is going (trivial?).
To the maintainer, please consider adding when applying:

"This access can not happen. Fix to silent static analyzer warnings."

As confirmed by Lidong in v1 here:
https://lists.gnu.org/archive/html/qemu-devel/2019-04/msg01337.html

Thanks,

Phil.

> Signed-off-by: Lidong Chen 
> Reviewed-by: Liam Merwick 
> Reviewed-by: Marc-André Lureau 
> Reviewed-by: Philippe Mathieu-Daudé 
> Reviewed-by: Li Qiang 
> Reviewed-by: Darren Kenny 
> ---
>  hw/sd/sd.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index aaab15f..818f86c 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -144,7 +144,7 @@ static const char *sd_state_name(enum SDCardStates state)
>  if (state == sd_inactive_state) {
>  return "inactive";
>  }
> -assert(state <= ARRAY_SIZE(state_name));
> +assert(state < ARRAY_SIZE(state_name));
>  return state_name[state];
>  }
>  
> @@ -165,7 +165,7 @@ static const char *sd_response_name(sd_rsp_type_t rsp)
>  if (rsp == sd_r1b) {
>  rsp = sd_r1;
>  }
> -assert(rsp <= ARRAY_SIZE(response_name));
> +assert(rsp < ARRAY_SIZE(response_name));
>  return response_name[rsp];
>  }
>  
> 



Re: [Qemu-devel] [PATCH] migratioin/ram.c: reset complete_round when we gets a queued page

2019-06-05 Thread Wei Yang
On Wed, Jun 05, 2019 at 02:41:08PM +0800, Peter Xu wrote:
>On Wed, Jun 05, 2019 at 09:08:28AM +0800, Wei Yang wrote:
>> In case we gets a queued page, the order of block is interrupted. We may
>> not rely on the complete_round flag to say we have already searched the
>> whole blocks on the list.
>> 
>> Signed-off-by: Wei Yang 
>> ---
>>  migration/ram.c | 6 ++
>>  1 file changed, 6 insertions(+)
>> 
>> diff --git a/migration/ram.c b/migration/ram.c
>> index d881981876..e9b40d636d 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -2290,6 +2290,12 @@ static bool get_queued_page(RAMState *rs, 
>> PageSearchStatus *pss)
>>   */
>>  pss->block = block;
>>  pss->page = offset >> TARGET_PAGE_BITS;
>> +
>> +/*
>> + * This unqueued page would break the "one round" check, even is
>> + * really rare.
>
>Why this is needed?  Could you help explain the problem first?

Peter, Thanks for your question.

I found this issue during code review and I believe this is a corner case.

Below is a draft chart for ram_find_and_save_block:

ram_find_and_save_block
do
get_queued_page()
find_dirty_block()
ram_save_host_page()
while

The basic logic here is : get a page need to migrate and migrate it.

In case we don't have get_queued_page(), find_dirty_block() will search the
whole ram_list.blocks by order. pss->complete_round is used to indicate
whether this search has looped.

Everything works fine after get_queued_page() involved. The block unqueued in
get_queued_page() could be any block in the ram_list.blocks. This means we
have very little chance to break the looped indicator.

   unqueue_page()  last_seen_block
 | |
ram_list.blocks  v v
-+=+---


Just draw a raw picture to demonstrate a corner case.

For example, we start from last_seen_block and search till the end of
ram_list.blocks. At this moment, pss->complete_round is set to true. Then we
get a queued page from unqueue_page() at the point I pointed. So the loop
continues may just continue the range as I marked as "=". We will skip all the
other ranges.

This is really a corner case, since ram_save_host_page() should return 0 and
there should be no dirty page in this range. But I don't see we may avoid this
case.

If I am not correct, just let me know :-)

>
>Thanks,
>
>-- 
>Peter Xu

-- 
Wei Yang
Help you, Help me



Re: [Qemu-devel] [Qemu-devel PATCH v2 2/2] util/main-loop: Fix incorrect assertion

2019-06-05 Thread Philippe Mathieu-Daudé
This patch doesn't seem related to the series cover.

On 6/5/19 8:21 AM, Lidong Chen wrote:
> The check for poll_fds in g_assert() was incorrect. The correct assertion
> should check "n_poll_fds + w->num <= ARRAY_SIZE(poll_fds)" because the
> subsequent for-loop is doing access to poll_fds[n_poll_fds + i] where i
> is in [0, w->num).
> 

Suggested-by: Peter Maydell 

> Signed-off-by: Lidong Chen 
> Reviewed-by: Liran Alon 
> Reviewed-by: Darren Kenny 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
>  util/main-loop.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/util/main-loop.c b/util/main-loop.c
> index e1e349c..a9f4e8d 100644
> --- a/util/main-loop.c
> +++ b/util/main-loop.c
> @@ -422,7 +422,7 @@ static int os_host_main_loop_wait(int64_t timeout)
>  g_main_context_prepare(context, &max_priority);
>  n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
>poll_fds, ARRAY_SIZE(poll_fds));
> -g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));
> +g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
>  
>  for (i = 0; i < w->num; i++) {
>  poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
> 



Re: [Qemu-devel] [PATCH v2 5/5] virtio: add "use-started" property

2019-06-05 Thread Greg Kurz
On Tue,  4 Jun 2019 15:34:59 +0800
elohi...@gmail.com wrote:

> From: Xie Yongji 
> 
> In order to avoid migration issues, we introduce a "use-started"
> property to the base virtio device to indicate whether use
> "started" flag or not. This property will be true by default and
> set to false when machine type <= 4.0.1.
> 
> Signed-off-by: Xie Yongji 
> ---
>  hw/block/vhost-user-blk.c  |  4 ++--
>  hw/core/machine.c  |  4 +++-
>  hw/virtio/virtio.c | 21 -
>  include/hw/virtio/virtio.h | 21 +
>  4 files changed, 34 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 9cb61336a6..85bc4017e7 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -191,7 +191,7 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
>  static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>  VHostUserBlk *s = VHOST_USER_BLK(vdev);
> -bool should_start = vdev->started;
> +bool should_start = virtio_device_started(vdev, status);
>  int ret;
>  
>  if (!vdev->vm_running) {
> @@ -317,7 +317,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
>  }
>  
>  /* restore vhost state */
> -if (vdev->started) {
> +if (virtio_device_started(vdev, vdev->status)) {
>  ret = vhost_user_blk_start(vdev);
>  if (ret < 0) {
>  error_report("vhost-user-blk: vhost start failed: %s",
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index f1a0f45f9c..133c113ebf 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -24,7 +24,9 @@
>  #include "hw/pci/pci.h"
>  #include "hw/mem/nvdimm.h"
>  
> -GlobalProperty hw_compat_4_0_1[] = {};
> +GlobalProperty hw_compat_4_0_1[] = {
> +{ "virtio-device", "use-started", "false" },
> +};
>  const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);

I'm discovering hw_compat_4_0_1, which seems to be only used by the
pc-q35-4.0.1 machine type...

>  
>  GlobalProperty hw_compat_4_0[] = {};

Not sure if it's the way to go but the same line should at least be added
here for all other machine types that use hw_compat_4_0[] eg. pseries-4.0
and older, which are the ones I need this fix for.

Cc'ing core machine code maintainers for advice.

> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 3960619bd4..9af2e339af 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1165,10 +1165,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
>  
>  if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
>  (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
> -vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> -if (unlikely(vdev->start_on_kick && vdev->started)) {
> -vdev->start_on_kick = false;
> -}
> +virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);

virtio_set_started() takes a bool as second argument, so this should
rather be !!(val & VIRTIO_CONFIG_S_DRIVER_OK) to avoid potential
warnings from picky compilers.

The rest looks good, but I'm wondering if this patch should be the first
one in the series to narrow the range of commits where backward migration
is broken.

>  }
>  
>  if (k->set_status) {
> @@ -1539,8 +1536,7 @@ static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
>  ret = vq->handle_aio_output(vdev, vq);
>  
>  if (unlikely(vdev->start_on_kick)) {
> -vdev->started = true;
> -vdev->start_on_kick = false;
> +virtio_set_started(vdev, true);
>  }
>  }
>  
> @@ -1560,8 +1556,7 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
>  vq->handle_output(vdev, vq);
>  
>  if (unlikely(vdev->start_on_kick)) {
> -vdev->started = true;
> -vdev->start_on_kick = false;
> +virtio_set_started(vdev, true);
>  }
>  }
>  }
> @@ -1581,8 +1576,7 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
>  vq->handle_output(vdev, vq);
>  
>  if (unlikely(vdev->start_on_kick)) {
> -vdev->started = true;
> -vdev->start_on_kick = false;
> +virtio_set_started(vdev, true);
>  }
>  }
>  }
> @@ -2083,7 +2077,7 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t 
> val)
>  }
>  }
>  
> -if (!vdev->started &&
> +if (!virtio_device_started(vdev, vdev->status) &&
>  !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>  vdev->start_on_kick = true;
>  }
> @@ -2238,7 +2232,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
> version_id)
>  }
>  }
>  
> -if (!vdev->started &&
> +if (!virtio_device_started(vdev, vdev->status) &&
>  !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>  vdev->start_on_kick = true;
>  }
> @@ -2306,7 +2300,7 @@ static void virtio_vmstate_change(void *opaque, int 
> running, Run

Re: [Qemu-devel] [PATCH 5/5] tricore: reset DisasContext before generating code

2019-06-05 Thread Bastian Koppelmann

Hi,

On 6/5/19 8:11 AM, David Brenken wrote:

From: Georg Hofstetter 

Signed-off-by: Andreas Konopik 
Signed-off-by: David Brenken 
Signed-off-by: Georg Hofstetter 
Signed-off-by: Robert Rasche 
Signed-off-by: Lars Biermanski 

---
  target/tricore/translate.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index db09f82c31..cdbc00d654 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -8811,6 +8811,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock 
*tb, int max_insns)
  target_ulong pc_start;
  int num_insns = 0;
  
+memset(&ctx, 0x00, sizeof(DisasContext));

  pc_start = tb->pc;
  ctx.pc = pc_start;
  ctx.saved_pc = -1;


To me this looks like fixing a symptom instead of the root cause. Which 
commit did you bisect to? Do you have a reproducer?


Cheers,

Bastian




Re: [Qemu-devel] [PATCH 06/13] target/arm/kvm: max cpu: Enable SVE when available

2019-06-05 Thread Auger Eric
Hi Drew,

On 5/12/19 10:36 AM, Andrew Jones wrote:
> Enable SVE in the KVM guest when the 'max' cpu type is configured
> and KVM supports it. KVM SVE requires use of the new finalize
> vcpu ioctl, so we add that now too.
> 
> Signed-off-by: Andrew Jones 
> ---
>  target/arm/cpu64.c   |  1 +
>  target/arm/kvm.c |  5 +
>  target/arm/kvm64.c   | 16 +++-
>  target/arm/kvm_arm.h | 12 
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> index 228906f26786..6c19ef6837d5 100644
> --- a/target/arm/cpu64.c
> +++ b/target/arm/cpu64.c
> @@ -292,6 +292,7 @@ static void aarch64_max_initfn(Object *obj)
>  
>  if (kvm_enabled()) {
>  kvm_arm_set_cpu_features_from_host(cpu);
> +cpu->sve_max_vq = ARM_MAX_VQ;
same line in the !kvm_enabled path. Maybe you can set the sve_max_vq
field in a subsequent patch and just introduce the finalize and
capability checking in that patch?
>  } else {
>  uint64_t t;
>  uint32_t u;
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 599563461264..c51db4229d0f 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -50,6 +50,11 @@ int kvm_arm_vcpu_init(CPUState *cs)
>  return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
>  }
>  
> +int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
> +{
> +return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
> +}
> +
>  void kvm_arm_init_serror_injection(CPUState *cs)
>  {
>  cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
> diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> index 86362f4cd7d0..c2d92df75353 100644
> --- a/target/arm/kvm64.c
> +++ b/target/arm/kvm64.c
> @@ -622,13 +622,20 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
>  }
>  if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
> -cpu->has_pmu = false;
> +cpu->has_pmu = false;
nit: maybe document this unrelated indent fix in the commit msg?
>  }
>  if (cpu->has_pmu) {
>  cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
>  } else {
>  unset_feature(&env->features, ARM_FEATURE_PMU);
>  }
> +if (cpu->sve_max_vq) {
> +if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_SVE)) {
> +cpu->sve_max_vq = 0;
> +} else {
> +cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
> +}
> +}
>  
>  /* Do KVM_ARM_VCPU_INIT ioctl */
>  ret = kvm_arm_vcpu_init(cs);
> @@ -636,6 +643,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  return ret;
>  }
>  
> +if (cpu->sve_max_vq) {
> +ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
> +if (ret) {
> +return ret;
> +}
> +}
> +
>  /*
>   * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
>   * Currently KVM has its own idea about MPIDR assignment, so we
> diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
> index 2a07333c615f..c488ec3ab410 100644
> --- a/target/arm/kvm_arm.h
> +++ b/target/arm/kvm_arm.h
> @@ -27,6 +27,18 @@
>   */
>  int kvm_arm_vcpu_init(CPUState *cs);
>  
> +/**
> + * kvm_arm_vcpu_finalize
> + * @cs: CPUState
> + * @feature: int
feature bitmap or bit?
> + *
> + * Finalizes the configuration of the specified VCPU feature
> + * by invoking the KVM_ARM_VCPU_FINALIZE ioctl.
> + *
> + * Returns: 0 if success else < 0 error code
> + */
> +int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
> +
>  /**
>   * kvm_arm_register_device:
>   * @mr: memory region for this device
> 



Re: [Qemu-devel] [PATCH v2 5/5] virtio: add "use-started" property

2019-06-05 Thread Yongji Xie
On Wed, 5 Jun 2019 at 17:00, Greg Kurz  wrote:
>
> On Tue,  4 Jun 2019 15:34:59 +0800
> elohi...@gmail.com wrote:
>
> > From: Xie Yongji 
> >
> > In order to avoid migration issues, we introduce a "use-started"
> > property to the base virtio device to indicate whether use
> > "started" flag or not. This property will be true by default and
> > set to false when machine type <= 4.0.1.
> >
> > Signed-off-by: Xie Yongji 
> > ---
> >  hw/block/vhost-user-blk.c  |  4 ++--
> >  hw/core/machine.c  |  4 +++-
> >  hw/virtio/virtio.c | 21 -
> >  include/hw/virtio/virtio.h | 21 +
> >  4 files changed, 34 insertions(+), 16 deletions(-)
> >
> > diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> > index 9cb61336a6..85bc4017e7 100644
> > --- a/hw/block/vhost-user-blk.c
> > +++ b/hw/block/vhost-user-blk.c
> > @@ -191,7 +191,7 @@ static void vhost_user_blk_stop(VirtIODevice *vdev)
> >  static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
> >  {
> >  VHostUserBlk *s = VHOST_USER_BLK(vdev);
> > -bool should_start = vdev->started;
> > +bool should_start = virtio_device_started(vdev, status);
> >  int ret;
> >
> >  if (!vdev->vm_running) {
> > @@ -317,7 +317,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
> >  }
> >
> >  /* restore vhost state */
> > -if (vdev->started) {
> > +if (virtio_device_started(vdev, vdev->status)) {
> >  ret = vhost_user_blk_start(vdev);
> >  if (ret < 0) {
> >  error_report("vhost-user-blk: vhost start failed: %s",
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index f1a0f45f9c..133c113ebf 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -24,7 +24,9 @@
> >  #include "hw/pci/pci.h"
> >  #include "hw/mem/nvdimm.h"
> >
> > -GlobalProperty hw_compat_4_0_1[] = {};
> > +GlobalProperty hw_compat_4_0_1[] = {
> > +{ "virtio-device", "use-started", "false" },
> > +};
> >  const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
>
> I'm discovering hw_compat_4_0_1, which seems to be only used by the
> pc-q35-4.0.1 machine type...
>

Oops, my mistake.

> >
> >  GlobalProperty hw_compat_4_0[] = {};
>
> Not sure if it's the way to go but the same line should at least be added
> here for all other machine types that use hw_compat_4_0[] eg. pseries-4.0
> and older, which are the ones I need this fix for.
>

I agree.

> Cc'ing core machine code maintainers for advice.
>
> > diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> > index 3960619bd4..9af2e339af 100644
> > --- a/hw/virtio/virtio.c
> > +++ b/hw/virtio/virtio.c
> > @@ -1165,10 +1165,7 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t 
> > val)
> >
> >  if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
> >  (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
> > -vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK;
> > -if (unlikely(vdev->start_on_kick && vdev->started)) {
> > -vdev->start_on_kick = false;
> > -}
> > +virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
>
> virtio_set_started() takes a bool as second argument, so this should
> rather be !!(val & VIRTIO_CONFIG_S_DRIVER_OK) to avoid potential
> warnings from picky compilers.
>

Will fix it in v3.

> The rest looks good, but I'm wondering if this patch should be the first
> one in the series to narrow the range of commits where backward migration
> is broken.
>

It's OK to me.

Thanks,
Yongji



Re: [Qemu-devel] [PULL 24/24] q35: Revert to kernel irqchip

2019-06-05 Thread Greg Kurz
On Mon,  3 Jun 2019 19:10:43 +0200
Paolo Bonzini  wrote:

> From: Alex Williamson 
> 
> Commit b2fc91db8447 ("q35: set split kernel irqchip as default") changed
> the default for the pc-q35-4.0 machine type to use split irqchip, which
> turned out to have disasterous effects on vfio-pci INTx support.  KVM
> resampling irqfds are registered for handling these interrupts, but
> these are non-functional in split irqchip mode.  We can't simply test
> for split irqchip in QEMU as userspace handling of this interrupt is a
> significant performance regression versus KVM handling (GeForce GPUs
> assigned to Windows VMs are non-functional without forcing MSI mode or
> re-enabling kernel irqchip).
> 
> The resolution is to revert the change in default irqchip mode in the
> pc-q35-4.1 machine and create a pc-q35-4.0.1 machine for the 4.0-stable
> branch.  The qemu-q35-4.0 machine type should not be used in vfio-pci
> configurations for devices requiring legacy INTx support without
> explicitly modifying the VM configuration to use kernel irqchip.
> 
> Link: https://bugs.launchpad.net/qemu/+bug/1826422
> Fixes: b2fc91db8447 ("q35: set split kernel irqchip as default")
> Signed-off-by: Alex Williamson 
> Reviewed-by: Peter Xu 
> Message-Id: <155786484688.13873.6037015630912983760.st...@gimli.home>
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/core/machine.c|  3 +++
>  hw/i386/pc.c |  3 +++
>  hw/i386/pc_q35.c | 16 ++--
>  include/hw/boards.h  |  3 +++
>  include/hw/i386/pc.h |  3 +++
>  5 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 16ba667..f1a0f45 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -24,6 +24,9 @@
>  #include "hw/pci/pci.h"
>  #include "hw/mem/nvdimm.h"
>  
> +GlobalProperty hw_compat_4_0_1[] = {};
> +const size_t hw_compat_4_0_1_len = G_N_ELEMENTS(hw_compat_4_0_1);
> +
>  GlobalProperty hw_compat_4_0[] = {};
>  const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
>  
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 2632b73..edc240b 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -110,6 +110,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
>  /* Physical Address of PVH entry point read from kernel ELF NOTE */
>  static size_t pvh_start_addr;
>  
> +GlobalProperty pc_compat_4_0_1[] = {};
> +const size_t pc_compat_4_0_1_len = G_N_ELEMENTS(pc_compat_4_0_1);
> +
>  GlobalProperty pc_compat_4_0[] = {};
>  const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
>  

Do we hence need to add properties to both 4_0 and 4_0_1 ? Would it
make sense to introduce a PC_COMPAT_4_0_COMMON macro to avoid this
duplication ?

The question arose while reviewing this patch:

https://lists.gnu.org/archive/html/qemu-devel/2019-06/msg00409.html

> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index 37dd350..dcddc64 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -357,7 +357,7 @@ static void pc_q35_machine_options(MachineClass *m)
>  m->units_per_default_bus = 1;
>  m->default_machine_opts = "firmware=bios-256k.bin";
>  m->default_display = "std";
> -m->default_kernel_irqchip_split = true;
> +m->default_kernel_irqchip_split = false;
>  m->no_floppy = 1;
>  machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
>  machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
> @@ -374,10 +374,22 @@ static void pc_q35_4_1_machine_options(MachineClass *m)
>  DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL,
> pc_q35_4_1_machine_options);
>  
> -static void pc_q35_4_0_machine_options(MachineClass *m)
> +static void pc_q35_4_0_1_machine_options(MachineClass *m)
>  {
>  pc_q35_4_1_machine_options(m);
>  m->alias = NULL;
> +compat_props_add(m->compat_props, hw_compat_4_0_1, hw_compat_4_0_1_len);
> +compat_props_add(m->compat_props, pc_compat_4_0_1, pc_compat_4_0_1_len);
> +}
> +
> +DEFINE_Q35_MACHINE(v4_0_1, "pc-q35-4.0.1", NULL,
> +   pc_q35_4_0_1_machine_options);
> +
> +static void pc_q35_4_0_machine_options(MachineClass *m)
> +{
> +pc_q35_4_0_1_machine_options(m);
> +m->default_kernel_irqchip_split = true;
> +m->alias = NULL;
>  compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
>  compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
>  }
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 6f7916f..6ff02bf 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -292,6 +292,9 @@ struct MachineState {
>  } \
>  type_init(machine_initfn##_register_types)
>  
> +extern GlobalProperty hw_compat_4_0_1[];
> +extern const size_t hw_compat_4_0_1_len;
> +
>  extern GlobalProperty hw_compat_4_0[];
>  extern const size_t hw_compat_4_0_len;
>  
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 43df723..5d56362 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -293,6 +293

Re: [Qemu-devel] [PATCH v2 06/22] s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL)

2019-06-05 Thread David Hildenbrand
On 03.06.19 11:06, David Hildenbrand wrote:
> Provide for all three instructions all four combinations of cc bit and
> s bit.
> 
> Signed-off-by: David Hildenbrand 
> ---
>  target/s390x/helper.h   |  12 
>  target/s390x/insn-data.def  |   6 ++
>  target/s390x/translate_vx.inc.c |  51 
>  target/s390x/vec_fpu_helper.c   | 104 
>  4 files changed, 173 insertions(+)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index d34d6802a6..33d3bacf74 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -254,6 +254,18 @@ DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, 
> ptr, cptr, cptr, env, i32)
>  DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
>  DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
>  DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
> +DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> +DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> +DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
> +DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
> +DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> +DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> +DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
> +DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
> +DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, 
> i32)
> +DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, 
> env, i32)
> +DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
> +DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
>  
>  #ifndef CONFIG_USER_ONLY
>  DEF_HELPER_3(servc, i32, env, i64, i64)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index c45e101b10..446552f251 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -1212,6 +1212,12 @@
>  F(0xe7cb, WFC, VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
>  /* VECTOR FP COMPARE AND SIGNAL SCALAR */
>  F(0xe7ca, WFK, VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
> +/* VECTOR FP COMPARE EQUAL */
> +F(0xe7e8, VFCE,VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
> +/* VECTOR FP COMPARE HIGH */
> +F(0xe7eb, VFCH,VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
> +/* VECTOR FP COMPARE HIGH OR EQUAL */
> +F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
>  
>  #ifndef CONFIG_USER_ONLY
>  /* COMPARE AND SWAP AND PURGE */
> diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
> index 283e8aa07a..5571a71e1a 100644
> --- a/target/s390x/translate_vx.inc.c
> +++ b/target/s390x/translate_vx.inc.c
> @@ -2588,3 +2588,54 @@ static DisasJumpType op_wfc(DisasContext *s, DisasOps 
> *o)
>  set_cc_static(s);
>  return DISAS_NEXT;
>  }
> +
> +static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
> +{
> +const uint8_t fpf = get_field(s->fields, m4);
> +const uint8_t m5 = get_field(s->fields, m5);
> +const uint8_t m6 = get_field(s->fields, m6);
> +const bool se = extract32(m5, 3, 1);
> +const bool cs = extract32(m6, 0, 1);
> +gen_helper_gvec_3_ptr *fn;
> +
> +if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
> +gen_program_exception(s, PGM_SPECIFICATION);
> +return DISAS_NORETURN;
> +}
> +
> +if (cs) {
> +switch (s->fields->op2) {
> +case 0xe8:
> +fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
> +break;
> +case 0xeb:
> +fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
> +break;
> +case 0xea:
> +fn = se ? gen_helper_gvec_vfche64s_cc : 
> gen_helper_gvec_vfche64_cc;
> +break;
> +default:
> +g_assert_not_reached();
> +}
> +} else {
> +switch (s->fields->op2) {
> +case 0xe8:
> +fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
> +break;
> +case 0xeb:
> +fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
> +break;
> +case 0xea:
> +fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
> +break;
> +default:
> +g_assert_not_reached();
> +}
> +}
> +gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2),
> +   get_field(s->fields, v3), cpu_env, 0, fn);
> +if (cs) {
> +set_cc_static(s);
> +}
> +return DISAS_NEXT;
> +}
> diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
> index f9357d9221..e72500d4d5 100644
> --- a/target/s390x/vec_fpu_helper.c
> +++ b/target/s390x/vec_fpu_helper.c
> @@ -149,3 +149,107 @@ void HELPER(gvec_wfk64)(const void *v1,

Re: [Qemu-devel] [PATCH] migratioin/ram.c: reset complete_round when we gets a queued page

2019-06-05 Thread Peter Xu
On Wed, Jun 05, 2019 at 04:52:07PM +0800, Wei Yang wrote:
> On Wed, Jun 05, 2019 at 02:41:08PM +0800, Peter Xu wrote:
> >On Wed, Jun 05, 2019 at 09:08:28AM +0800, Wei Yang wrote:
> >> In case we gets a queued page, the order of block is interrupted. We may
> >> not rely on the complete_round flag to say we have already searched the
> >> whole blocks on the list.
> >> 
> >> Signed-off-by: Wei Yang 
> >> ---
> >>  migration/ram.c | 6 ++
> >>  1 file changed, 6 insertions(+)
> >> 
> >> diff --git a/migration/ram.c b/migration/ram.c
> >> index d881981876..e9b40d636d 100644
> >> --- a/migration/ram.c
> >> +++ b/migration/ram.c
> >> @@ -2290,6 +2290,12 @@ static bool get_queued_page(RAMState *rs, 
> >> PageSearchStatus *pss)
> >>   */
> >>  pss->block = block;
> >>  pss->page = offset >> TARGET_PAGE_BITS;
> >> +
> >> +/*
> >> + * This unqueued page would break the "one round" check, even is
> >> + * really rare.
> >
> >Why this is needed?  Could you help explain the problem first?
> 
> Peter, Thanks for your question.
> 
> I found this issue during code review and I believe this is a corner case.
> 
> Below is a draft chart for ram_find_and_save_block:
> 
> ram_find_and_save_block
> do
> get_queued_page()
> find_dirty_block()
> ram_save_host_page()
> while
> 
> The basic logic here is : get a page need to migrate and migrate it.
> 
> In case we don't have get_queued_page(), find_dirty_block() will search the
> whole ram_list.blocks by order. pss->complete_round is used to indicate
> whether this search has looped.
> 
> Everything works fine after get_queued_page() involved. The block unqueued in
> get_queued_page() could be any block in the ram_list.blocks. This means we
> have very little chance to break the looped indicator.
> 
>unqueue_page()  last_seen_block
>  | |
> ram_list.blocks  v v
> -+=+---
> 
> 
> Just draw a raw picture to demonstrate a corner case.
> 
> For example, we start from last_seen_block and search till the end of
> ram_list.blocks. At this moment, pss->complete_round is set to true. Then we
> get a queued page from unqueue_page() at the point I pointed. So the loop
> continues may just continue the range as I marked as "=". We will skip all the
> other ranges.

Ah I see your point, but I don't think there is a problem - note that
complete_round will be reset for each ram_find_and_save_block(), so
even if we have that iteration of ram_find_and_save_block() to return
we'll still know we have dirty pages to migrate and in the next call
we'll be fine, no?

-- 
Peter Xu



Re: [Qemu-devel] [PATCH for-4.1 0/2] hw/alpha: Add the CY82C693UB southbridge in Kconfig

2019-06-05 Thread Laurent Vivier
On 27/05/2019 18:05, Philippe Mathieu-Daudé wrote:
> On 5/8/19 10:16 PM, Philippe Mathieu-Daudé wrote:
>> Paolo, Thomas,
>>
>> On 4/29/19 1:29 PM, Philippe Mathieu-Daudé wrote:
>>> CC'ing Thomas who is a Kconfig expert.
>>>
>>> On 3/17/19 12:44 AM, Philippe Mathieu-Daudé wrote:
 Explicit the CY82C693UB southbridge used by the 264DP.

 Philippe Mathieu-Daudé (2):
   hw/isa/southbridge: Add the Cypress 82C693UB chipset
   hw/alpha/Kconfig: The 264DP machine use a CY82C693UB southbridge
>>
>> This series does not fix anything, but makes the kconfig graph cleaner.
> 
> Ping?
> 

You should merge the two patches: it will make clearer why you can
remove the flags from DP264 as we can see they are added in CY82C693UB.

I think it can also go through trivial.

Thanks,
Laurent



[Qemu-devel] [PATCH 2/2] nbd-client: enable TCP keepalive

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
Enable keepalive option to track server availablity.

Requested-by: Denis V. Lunev 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/nbd-client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 790ecc1ee1..b57cea8482 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1137,6 +1137,7 @@ static int nbd_client_connect(BlockDriverState *bs,
 
 /* NBD handshake */
 logout("session init %s\n", export);
+qio_channel_set_keepalive(QIO_CHANNEL(sioc), true, NULL);
 qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
 client->info.request_sizes = true;
-- 
2.18.0




[Qemu-devel] [PATCH 0/2] nbd: enable keepalive

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
Hi all!

Here is a suggestion to enable keepalive option to track server availablity.

Vladimir Sementsov-Ogievskiy (2):
  io/channel: add qio_channel_set_keepalive
  nbd-client: enable TCP keepalive

 include/io/channel.h | 13 +
 block/nbd-client.c   |  1 +
 io/channel-socket.c  | 19 +++
 io/channel.c | 14 ++
 4 files changed, 47 insertions(+)

-- 
2.18.0




[Qemu-devel] [PATCH 1/2] io/channel: add qio_channel_set_keepalive

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/io/channel.h | 13 +
 io/channel-socket.c  | 19 +++
 io/channel.c | 14 ++
 3 files changed, 46 insertions(+)

diff --git a/include/io/channel.h b/include/io/channel.h
index 59460cb1ec..34d871a414 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -124,6 +124,9 @@ struct QIOChannelClass {
 int (*io_set_blocking)(QIOChannel *ioc,
bool enabled,
Error **errp);
+int (*io_set_keepalive)(QIOChannel *ioc,
+bool enabled,
+Error **errp);
 
 /* Optional callbacks */
 int (*io_shutdown)(QIOChannel *ioc,
@@ -490,6 +493,16 @@ int qio_channel_set_blocking(QIOChannel *ioc,
  bool enabled,
  Error **errp);
 
+/*
+ * qio_channel_set_keepalive:
+ * @ioc: the channel object
+ * @enabled: the keepalive flag state
+ * @errp: pointer to a NULL-initialized error object
+ */
+int qio_channel_set_keepalive(QIOChannel *ioc,
+  bool enabled,
+  Error **errp);
+
 /**
  * qio_channel_close:
  * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index bc5f80e780..5c1ea08660 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -656,6 +656,24 @@ qio_channel_socket_set_blocking(QIOChannel *ioc,
 }
 
 
+static int
+qio_channel_socket_set_keepalive(QIOChannel *ioc,
+ bool enabled,
+ Error **errp)
+{
+QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+int val = enabled;
+int ret = qemu_setsockopt(sioc->fd, SOL_SOCKET, SO_KEEPALIVE,
+  &val, sizeof(val));
+
+if (ret < 0) {
+error_setg_errno(errp, errno, "Unable to set KEEPALIVE");
+}
+
+return ret;
+}
+
+
 static void
 qio_channel_socket_set_delay(QIOChannel *ioc,
  bool enabled)
@@ -762,6 +780,7 @@ static void qio_channel_socket_class_init(ObjectClass 
*klass,
 ioc_klass->io_writev = qio_channel_socket_writev;
 ioc_klass->io_readv = qio_channel_socket_readv;
 ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
+ioc_klass->io_set_keepalive = qio_channel_socket_set_keepalive;
 ioc_klass->io_close = qio_channel_socket_close;
 ioc_klass->io_shutdown = qio_channel_socket_shutdown;
 ioc_klass->io_set_cork = qio_channel_socket_set_cork;
diff --git a/io/channel.c b/io/channel.c
index 2a26c2a2c0..0f0b2b7b65 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -265,6 +265,20 @@ int qio_channel_set_blocking(QIOChannel *ioc,
 return klass->io_set_blocking(ioc, enabled, errp);
 }
 
+int qio_channel_set_keepalive(QIOChannel *ioc,
+  bool enabled,
+  Error **errp)
+{
+QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+if (!klass->io_set_keepalive) {
+error_setg(errp, "KEEPALIVE is not supported by IO channel");
+return -ENOTSUP;
+}
+
+return klass->io_set_keepalive(ioc, enabled, errp);
+}
+
 
 int qio_channel_close(QIOChannel *ioc,
   Error **errp)
-- 
2.18.0




Re: [Qemu-devel] [PATCH] MAINTAINERS: Change maintership of Xen code under hw/9pfs

2019-06-05 Thread Greg Kurz
On Wed, 29 May 2019 13:59:26 +0100
Anthony PERARD  wrote:

> On Wed, May 29, 2019 at 12:24:44PM +0200, Greg Kurz wrote:
> > Xen folks are the actual maintainers for this.
> > 
> > Signed-off-by: Greg Kurz 
> > ---
> >  MAINTAINERS |3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 1f5f8b7a2c37..d00380641796 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -411,7 +411,7 @@ M: Paul Durrant 
> >  L: xen-de...@lists.xenproject.org
> >  S: Supported
> >  F: */xen*
> > -F: hw/9pfs/xen-9p-backend.c
> > +F: hw/9pfs/xen-9p*
> >  F: hw/char/xen_console.c
> >  F: hw/display/xenfb.c
> >  F: hw/net/xen_nic.c
> > @@ -1505,6 +1505,7 @@ virtio-9p
> >  M: Greg Kurz 
> >  S: Supported
> >  F: hw/9pfs/
> > +X: hw/9pfs/xen-9p*
> >  F: fsdev/
> >  F: tests/virtio-9p-test.c
> >  T: git https://github.com/gkurz/qemu.git 9p-next
> >   
> 
> Acked-by: Anthony PERARD 
> 
> Thanks,
> 

Ping ?

I'd rather also get an ack from Stefano and Paul before merging this.

Cheers,

--
Greg



Re: [Qemu-devel] [PATCH] MAINTAINERS: Change maintership of Xen code under hw/9pfs

2019-06-05 Thread Paul Durrant
> -Original Message-
> From: Greg Kurz [mailto:gr...@kaod.org]
> Sent: 05 June 2019 11:11
> To: Anthony Perard 
> Cc: qemu-devel@nongnu.org; Stefano Stabellini ; Paul 
> Durrant
> 
> Subject: Re: [PATCH] MAINTAINERS: Change maintership of Xen code under hw/9pfs
> 
> On Wed, 29 May 2019 13:59:26 +0100
> Anthony PERARD  wrote:
> 
> > On Wed, May 29, 2019 at 12:24:44PM +0200, Greg Kurz wrote:
> > > Xen folks are the actual maintainers for this.
> > >
> > > Signed-off-by: Greg Kurz 
> > > ---
> > >  MAINTAINERS |3 ++-
> > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > index 1f5f8b7a2c37..d00380641796 100644
> > > --- a/MAINTAINERS
> > > +++ b/MAINTAINERS
> > > @@ -411,7 +411,7 @@ M: Paul Durrant 
> > >  L: xen-de...@lists.xenproject.org
> > >  S: Supported
> > >  F: */xen*
> > > -F: hw/9pfs/xen-9p-backend.c
> > > +F: hw/9pfs/xen-9p*
> > >  F: hw/char/xen_console.c
> > >  F: hw/display/xenfb.c
> > >  F: hw/net/xen_nic.c
> > > @@ -1505,6 +1505,7 @@ virtio-9p
> > >  M: Greg Kurz 
> > >  S: Supported
> > >  F: hw/9pfs/
> > > +X: hw/9pfs/xen-9p*
> > >  F: fsdev/
> > >  F: tests/virtio-9p-test.c
> > >  T: git https://github.com/gkurz/qemu.git 9p-next
> > >
> >
> > Acked-by: Anthony PERARD 
> >
> > Thanks,
> >
> 
> Ping ?
> 
> I'd rather also get an ack from Stefano and Paul before merging this.
> 

Fine by me...

Acked-by: Paul Durrant 

> Cheers,
> 
> --
> Greg



[Qemu-devel] [PATCH] tests: Add proper newlines for qmp_fd_receive

2019-06-05 Thread Peter Xu
It changes QTEST_LOG=1 results a bit more lovely, from:

{"return": ... }{"execute": "query-migrate"}

{"return": ... }{"execute": "query-migrate"}

Into:

{"execute": "query-migrate"}

{"return": ... }

{"execute": "query-migrate"}

{"return": ... }

CC: Thomas Huth 
CC: Laurent Vivier 
CC: Paolo Bonzini 
Signed-off-by: Peter Xu 
---
 tests/libqtest.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 546a875913..d7301d773e 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -532,6 +532,10 @@ QDict *qmp_fd_receive(int fd)
 }
 json_message_parser_destroy(&qmp.parser);
 
+if (log) {
+fprintf(stderr, "\n\n");
+}
+
 return qmp.response;
 }
 
-- 
2.17.1




Re: [Qemu-devel] [PATCH] migratioin/ram.c: reset complete_round when we gets a queued page

2019-06-05 Thread Juan Quintela
Peter Xu  wrote:
> On Wed, Jun 05, 2019 at 04:52:07PM +0800, Wei Yang wrote:
>> On Wed, Jun 05, 2019 at 02:41:08PM +0800, Peter Xu wrote:
>> >On Wed, Jun 05, 2019 at 09:08:28AM +0800, Wei Yang wrote:
>> >> In case we gets a queued page, the order of block is interrupted. We may
>> >> not rely on the complete_round flag to say we have already searched the
>> >> whole blocks on the list.
>> >> 
>> >> Signed-off-by: Wei Yang 
>> >> ---
>> >>  migration/ram.c | 6 ++
>> >>  1 file changed, 6 insertions(+)
>> >> 
>> >> diff --git a/migration/ram.c b/migration/ram.c
>> >> index d881981876..e9b40d636d 100644
>> >> --- a/migration/ram.c
>> >> +++ b/migration/ram.c
>> >> @@ -2290,6 +2290,12 @@ static bool get_queued_page(RAMState *rs, 
>> >> PageSearchStatus *pss)
>> >>   */
>> >>  pss->block = block;
>> >>  pss->page = offset >> TARGET_PAGE_BITS;
>> >> +
>> >> +/*
>> >> + * This unqueued page would break the "one round" check, even is
>> >> + * really rare.
>> >


> Ah I see your point, but I don't think there is a problem - note that
> complete_round will be reset for each ram_find_and_save_block(), so
> even if we have that iteration of ram_find_and_save_block() to return
> we'll still know we have dirty pages to migrate and in the next call
> we'll be fine, no?

Reviewed-by: Juan Quintela 

I *think* that peter is perhaps right, but it is not clear at all, and
it is easier to be safe.  I think that the only case that this could
matter is if:
- all pages are clean (so complete_round will get as true)
- we went a queue_page request

Is that possible?  I am not completely sure after looking at the code.
It *could* be if the page that got queued is the last page remaining,
but ..  I fully agree that the case that _almost all_ pages are
clean and we get a request for a queued page is really rare, so it
should not matter in real life, but 

Later, Juan.



Re: [Qemu-devel] [PATCH] migration/multifd: sync packet_num after all thread are done

2019-06-05 Thread Juan Quintela
Wei Yang  wrote:
> Notification from recv thread is not ordered, which means we may be
> notified by one MultiFDRecvParams but adjust packet_num for another.
>
> Move the adjustment after we are sure each recv thread are sync-ed.
>
> Signed-off-by: Wei Yang 


Reviewed-by: Juan Quintela 

It shouldn't matter a lot in real life, but I agree that it is better.



Re: [Qemu-devel] [PATCH 06/13] target/arm/kvm: max cpu: Enable SVE when available

2019-06-05 Thread Andrew Jones
On Wed, Jun 05, 2019 at 11:09:56AM +0200, Auger Eric wrote:
> Hi Drew,
> 
> On 5/12/19 10:36 AM, Andrew Jones wrote:
> > Enable SVE in the KVM guest when the 'max' cpu type is configured
> > and KVM supports it. KVM SVE requires use of the new finalize
> > vcpu ioctl, so we add that now too.
> > 
> > Signed-off-by: Andrew Jones 
> > ---
> >  target/arm/cpu64.c   |  1 +
> >  target/arm/kvm.c |  5 +
> >  target/arm/kvm64.c   | 16 +++-
> >  target/arm/kvm_arm.h | 12 
> >  4 files changed, 33 insertions(+), 1 deletion(-)
> > 
> > diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
> > index 228906f26786..6c19ef6837d5 100644
> > --- a/target/arm/cpu64.c
> > +++ b/target/arm/cpu64.c
> > @@ -292,6 +292,7 @@ static void aarch64_max_initfn(Object *obj)
> >  
> >  if (kvm_enabled()) {
> >  kvm_arm_set_cpu_features_from_host(cpu);
> > +cpu->sve_max_vq = ARM_MAX_VQ;
> same line in the !kvm_enabled path. Maybe you can set the sve_max_vq
> field in a subsequent patch and just introduce the finalize and
> capability checking in that patch?

This gets changed in a subsequent patch, so factoring now would
be wasted code motion. I'm not sure the finalize function is worth
its own patch, so I'm inclined to leave this as is.

> >  } else {
> >  uint64_t t;
> >  uint32_t u;
> > diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> > index 599563461264..c51db4229d0f 100644
> > --- a/target/arm/kvm.c
> > +++ b/target/arm/kvm.c
> > @@ -50,6 +50,11 @@ int kvm_arm_vcpu_init(CPUState *cs)
> >  return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
> >  }
> >  
> > +int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
> > +{
> > +return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature);
> > +}
> > +
> >  void kvm_arm_init_serror_injection(CPUState *cs)
> >  {
> >  cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state,
> > diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
> > index 86362f4cd7d0..c2d92df75353 100644
> > --- a/target/arm/kvm64.c
> > +++ b/target/arm/kvm64.c
> > @@ -622,13 +622,20 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >  cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
> >  }
> >  if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
> > -cpu->has_pmu = false;
> > +cpu->has_pmu = false;
> nit: maybe document this unrelated indent fix in the commit msg?

It's pretty obvious without extra commentary, IMHO.

> >  }
> >  if (cpu->has_pmu) {
> >  cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
> >  } else {
> >  unset_feature(&env->features, ARM_FEATURE_PMU);
> >  }
> > +if (cpu->sve_max_vq) {
> > +if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_SVE)) {
> > +cpu->sve_max_vq = 0;
> > +} else {
> > +cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
> > +}
> > +}
> >  
> >  /* Do KVM_ARM_VCPU_INIT ioctl */
> >  ret = kvm_arm_vcpu_init(cs);
> > @@ -636,6 +643,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >  return ret;
> >  }
> >  
> > +if (cpu->sve_max_vq) {
> > +ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
> > +if (ret) {
> > +return ret;
> > +}
> > +}
> > +
> >  /*
> >   * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
> >   * Currently KVM has its own idea about MPIDR assignment, so we
> > diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
> > index 2a07333c615f..c488ec3ab410 100644
> > --- a/target/arm/kvm_arm.h
> > +++ b/target/arm/kvm_arm.h
> > @@ -27,6 +27,18 @@
> >   */
> >  int kvm_arm_vcpu_init(CPUState *cs);
> >  
> > +/**
> > + * kvm_arm_vcpu_finalize
> > + * @cs: CPUState
> > + * @feature: int
> feature bitmap or bit?

Neither. I can improve this by stating these integers must be one
of the set defined in the "KVM_ARM_VCPU_FINALIZE" section of
kernel doc Documentation/virtual/kvm/api.txt though.

> > + *
> > + * Finalizes the configuration of the specified VCPU feature
> > + * by invoking the KVM_ARM_VCPU_FINALIZE ioctl.
> > + *
> > + * Returns: 0 if success else < 0 error code
> > + */
> > +int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
> > +
> >  /**
> >   * kvm_arm_register_device:
> >   * @mr: memory region for this device
> > 
> 

Thanks,
drew



Re: [Qemu-devel] [PATCH v4 16/39] target/m68k: Use env_cpu, env_archcpu

2019-06-05 Thread Laurent Vivier
Le 04/06/2019 à 22:33, Richard Henderson a écrit :
> Cleanup in the boilerplate that each target must define.
> Replace m68k_env_get_cpu with env_archcpu.  The combination
> CPU(m68k_env_get_cpu) should have used ENV_GET_CPU to begin;
> use env_cpu now.
> 
> Reviewed-by: Alistair Francis 
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/m68k/target_cpu.h |  2 +-
>  target/m68k/cpu.h|  5 -
>  linux-user/m68k-sim.c|  3 +--
>  linux-user/m68k/cpu_loop.c   |  2 +-
>  target/m68k/helper.c | 33 -
>  target/m68k/m68k-semi.c  |  4 ++--
>  target/m68k/op_helper.c  | 12 ++--
>  target/m68k/translate.c  |  4 +---
>  8 files changed, 24 insertions(+), 41 deletions(-)
> 

There is no use of env_archcpu() in this patch, perhaps the comment can
be updated.

Acked-by: Laurent Vivier 

Thanks,
Laurent




[Qemu-devel] [Bug 1831225] Re: guest migration 100% cpu freeze bug

2019-06-05 Thread Dr. David Alan Gilbert
You say it's only happened since 4.19 - that's possible - but since this
bug is so tricky to trigger it's also possible that any slight change in
4.19.

You could try disabling kvm_clock?

Dave

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1831225

Title:
  guest migration 100% cpu freeze bug

Status in QEMU:
  New

Bug description:
  # Investigate migration cpu hog(100%) bug

  I have some issues when migrating from kernel 4.14.63 running qemu 2.11.2 to 
kernel 4.19.43 running qemu 2.11.2.
  The hypervisors are running on debian jessie with libvirt v5.3.0.
  Linux, libvirt and qemu are all custom compiled.

  I migrated around 10.000 vms and every once in a while a vm is stuck
  at 100% cpu after what we can see right now is that the target
  hypervisor runs on linux 4.19.53. This happened with 4 vms so far. It
  is not that easy to debug, we found this out pretty quickly because we
  are running monitoring on frozen vms after migrations.

  Last year we were having the same "kind of" bug 
https://bugs.launchpad.net/qemu/+bug/177 when trying to upgrade qemu 2.6 to 
2.11.
  This bug was fixed after applying the following patch: 
http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00820.html

  This patch is still applied as you can see because of the available pre_load 
var on the kvmclock_vmsd struct:
  (gdb) ptype kvmclock_vmsd
  type = const struct VMStateDescription {
  const char *name;
  int unmigratable;
  int version_id;
  int minimum_version_id;
  int minimum_version_id_old;
  MigrationPriority priority;
  LoadStateHandler *load_state_old;
  int (*pre_load)(void *);
  int (*post_load)(void *, int);
  int (*pre_save)(void *);
  _Bool (*needed)(void *);
  VMStateField *fields;
  const VMStateDescription **subsections;
  }

  I attached gdb to a vcpu thread of one stuck vm, and a bt showed the 
following info:
  Thread 4 (Thread 0x7f3a431a4700 (LWP 37799)):
  #0  0x7f3a576f5017 in ioctl () at ../sysdeps/unix/syscall-template.S:84
  #1  0x55d84d15de57 in kvm_vcpu_ioctl (cpu=cpu@entry=0x55d84fca78d0, 
type=type@entry=44672) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:2050
  #2  0x55d84d15dfc6 in kvm_cpu_exec (cpu=cpu@entry=0x55d84fca78d0) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:1887
  #3  0x55d84d13ab64 in qemu_kvm_cpu_thread_fn (arg=0x55d84fca78d0) at 
/home/dbosschieter/src/qemu-pkg/src/cpus.c:1136
  #4  0x7f3a579ba4a4 in start_thread (arg=0x7f3a431a4700) at 
pthread_create.c:456
  #5  0x7f3a576fcd0f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:97

  Thread 3 (Thread 0x7f3a439a5700 (LWP 37798)):
  #0  0x7f3a576f5017 in ioctl () at ../sysdeps/unix/syscall-template.S:84
  #1  0x55d84d15de57 in kvm_vcpu_ioctl (cpu=cpu@entry=0x55d84fc5cbb0, 
type=type@entry=44672) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:2050
  #2  0x55d84d15dfc6 in kvm_cpu_exec (cpu=cpu@entry=0x55d84fc5cbb0) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:1887
  #3  0x55d84d13ab64 in qemu_kvm_cpu_thread_fn (arg=0x55d84fc5cbb0) at 
/home/dbosschieter/src/qemu-pkg/src/cpus.c:1136
  #4  0x7f3a579ba4a4 in start_thread (arg=0x7f3a439a5700) at 
pthread_create.c:456
  #5  0x7f3a576fcd0f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:97

  The ioctl call is a ioctl(18, KVM_RUN and it looks like it is looping
  inside the vm itself.

  I saved the state of the VM (with `virsh save`) after I found it was hanging 
on its vcpu threads. Then I restored this vm on a test environment running the 
same kernel, QEMU and libvirt version). After the restore the VM still was 
haning at 100% cpu usage on all the vcpus.
  I tried to use the perf kvm guest option to trace the guest vm with a copy of 
the kernel, modules and kallsyms files from inside the guest vm and I got to 
the following perf stat:

   Event Total %Total CurAvg/s
   kvm_entry   5198993   23.1   277007
   kvm_exit5198976   23.1   277006
   kvm_apic17321037.792289
   kvm_msr 17321017.792289
   kvm_inj_virq17319047.792278
   kvm_eoi 17319007.792278
   kvm_apic_accept_irq 17319007.792278
   kvm_hv_timer_state  17317807.792274
   kvm_pv_eoi  17317017.792267
   kvm_ple_window   360.02
   Total  22521394 1199967

  We tried to run the crash tool against a dump of guest vm memory and that 
ga

[Qemu-devel] [PULL 5/5] migratioin/ram: leave RAMBlock->bmap blank on allocating

2019-06-05 Thread Juan Quintela
From: Wei Yang 

During migration, we would sync bitmap from ram_list.dirty_memory to
RAMBlock.bmap in cpu_physical_memory_sync_dirty_bitmap().

Since we set RAMBlock.bmap and ram_list.dirty_memory both to all 1, this
means at the first round this sync is meaningless and is a duplicated
work.

Leaving RAMBlock->bmap blank on allocating would have a side effect on
migration_dirty_pages, since it is calculated from the result of
cpu_physical_memory_sync_dirty_bitmap(). To keep it right, we need to
set migration_dirty_pages to 0 in ram_state_init().

Signed-off-by: Wei Yang 
Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 03a9cce9f9..082aea9d23 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3173,11 +3173,11 @@ static int ram_state_init(RAMState **rsp)
 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
 
 /*
- * Count the total number of pages used by ram blocks not including any
- * gaps due to alignment or unplugs.
+ * This must match with the initial values of dirty bitmap.
+ * Currently we initialize the dirty bitmap to all zeros so
+ * here the total dirty page count is zero.
  */
-(*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
+(*rsp)->migration_dirty_pages = 0;
 ram_state_reset(*rsp);
 
 return 0;
@@ -3192,8 +3192,16 @@ static void ram_list_init_bitmaps(void)
 if (ram_bytes_total()) {
 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
 pages = block->max_length >> TARGET_PAGE_BITS;
+/*
+ * The initial dirty bitmap for migration must be set with all
+ * ones to make sure we'll migrate every guest RAM page to
+ * destination.
+ * Here we didn't set RAMBlock.bmap simply because it is already
+ * set in ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION] in
+ * ram_block_add, and that's where we'll sync the dirty bitmaps.
+ * Here setting RAMBlock.bmap would be fine too but not necessary.
+ */
 block->bmap = bitmap_new(pages);
-bitmap_set(block->bmap, 0, pages);
 if (migrate_postcopy_ram()) {
 block->unsentmap = bitmap_new(pages);
 bitmap_set(block->unsentmap, 0, pages);
-- 
2.21.0




[Qemu-devel] [PULL 1/5] migration/ram.c: MultiFDSendParams.sem_sync is not really used

2019-06-05 Thread Juan Quintela
From: Wei Yang 

Besides init and destroy, MultiFDSendParams.sem_sync is not really used.

Signed-off-by: Wei Yang 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 4c60869226..4c15f6fda1 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -661,8 +661,6 @@ typedef struct {
 uint64_t num_packets;
 /* pages sent through this channel */
 uint64_t num_pages;
-/* syncs main thread and channels */
-QemuSemaphore sem_sync;
 }  MultiFDSendParams;
 
 typedef struct {
@@ -1027,7 +1025,6 @@ void multifd_save_cleanup(void)
 p->c = NULL;
 qemu_mutex_destroy(&p->mutex);
 qemu_sem_destroy(&p->sem);
-qemu_sem_destroy(&p->sem_sync);
 g_free(p->name);
 p->name = NULL;
 multifd_pages_clear(p->pages);
@@ -1201,7 +1198,6 @@ int multifd_save_setup(void)
 
 qemu_mutex_init(&p->mutex);
 qemu_sem_init(&p->sem, 0);
-qemu_sem_init(&p->sem_sync, 0);
 p->quit = false;
 p->pending_job = 0;
 p->id = i;
-- 
2.21.0




[Qemu-devel] [PULL 0/5] Migration patches

2019-06-05 Thread Juan Quintela
The following changes since commit 47fbad45d47af8af784bb12a5719489edcd89b4c:

  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
(2019-06-04 17:22:42 +0100)

are available in the Git repository at:

  https://github.com/juanquintela/qemu.git tags/migration-pull-request

for you to fetch changes up to 03158519384f15890d587937bd1b3ea699898e55:

  migratioin/ram: leave RAMBlock->bmap blank on allocating (2019-06-05 12:44:03 
+0200)


Migration Pull request

- Fd fixes and test (yuri)
- several fixes (wei)



Wei Yang (3):
  migration/ram.c: MultiFDSendParams.sem_sync is not really used
  migration/ram.c: multifd_send_state->count is not really used
  migratioin/ram: leave RAMBlock->bmap blank on allocating

Yury Kotov (2):
  migration: Fix fd protocol for incoming defer
  migration-test: Add a test for fd protocol

 migration/fd.c |   8 ++--
 migration/fd.h |   2 +-
 migration/ram.c|  27 ++-
 tests/libqtest.c   |  80 ++--
 tests/libqtest.h   |  51 -
 tests/migration-test.c | 101 +
 6 files changed, 246 insertions(+), 23 deletions(-)

-- 
2.21.0




[Qemu-devel] [PULL 2/5] migration/ram.c: multifd_send_state->count is not really used

2019-06-05 Thread Juan Quintela
From: Wei Yang 

Signed-off-by: Wei Yang 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/ram.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 4c15f6fda1..03a9cce9f9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -892,8 +892,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, 
Error **errp)
 
 struct {
 MultiFDSendParams *params;
-/* number of created threads */
-int count;
 /* array of pages to sent */
 MultiFDPages_t *pages;
 /* syncs main thread and channels */
@@ -1171,8 +1169,6 @@ static void multifd_new_send_channel_async(QIOTask *task, 
gpointer opaque)
 p->running = true;
 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
QEMU_THREAD_JOINABLE);
-
-atomic_inc(&multifd_send_state->count);
 }
 }
 
@@ -1188,7 +1184,6 @@ int multifd_save_setup(void)
 thread_count = migrate_multifd_channels();
 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-atomic_set(&multifd_send_state->count, 0);
 multifd_send_state->pages = multifd_pages_init(page_count);
 qemu_sem_init(&multifd_send_state->sem_sync, 0);
 qemu_sem_init(&multifd_send_state->channels_ready, 0);
-- 
2.21.0




[Qemu-devel] [PULL 3/5] migration: Fix fd protocol for incoming defer

2019-06-05 Thread Juan Quintela
From: Yury Kotov 

Currently, incoming migration through fd supports only command-line case:
E.g.
fork();
fd = open();
exec("qemu ... -incoming fd:%d", fd);

It's possible to use add-fd commands to pass fd for migration, but it's
invalid case. add-fd works with fdset but not with particular fds.

To work with getfd in incoming defer it's enough to use monitor_fd_param
instead of strtol. monitor_fd_param supports both cases:
* fd:123
* fd:fd_name (added by getfd).

And also the use of monitor_fd_param improves error messages.

Signed-off-by: Yury Kotov 
Reviewed-by: Peter Xu 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 migration/fd.c | 8 +---
 migration/fd.h | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/migration/fd.c b/migration/fd.c
index a7c13df4ad..0a29ecdebf 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -52,12 +52,14 @@ static gboolean fd_accept_incoming_migration(QIOChannel 
*ioc,
 return G_SOURCE_REMOVE;
 }
 
-void fd_start_incoming_migration(const char *infd, Error **errp)
+void fd_start_incoming_migration(const char *fdname, Error **errp)
 {
 QIOChannel *ioc;
-int fd;
+int fd = monitor_fd_param(cur_mon, fdname, errp);
+if (fd == -1) {
+return;
+}
 
-fd = strtol(infd, NULL, 0);
 trace_migration_fd_incoming(fd);
 
 ioc = qio_channel_new_fd(fd, errp);
diff --git a/migration/fd.h b/migration/fd.h
index a14a63ce2e..b901bc014e 100644
--- a/migration/fd.h
+++ b/migration/fd.h
@@ -16,7 +16,7 @@
 
 #ifndef QEMU_MIGRATION_FD_H
 #define QEMU_MIGRATION_FD_H
-void fd_start_incoming_migration(const char *path, Error **errp);
+void fd_start_incoming_migration(const char *fdname, Error **errp);
 
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname,
  Error **errp);
-- 
2.21.0




Re: [Qemu-devel] [PATCH v2 01/12] util/iov: introduce qemu_iovec_init_extended

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
04.06.2019 19:15, Vladimir Sementsov-Ogievskiy wrote:
> Introduce new initialization API, to create requests with padding. Will
> be used in the following patch. New API uses qemu_iovec_init_buf if
> resulting io vector has only one element, to avoid extra allocations.
> So, we need to update qemu_iovec_destroy to support destroying such
> QIOVs.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>   include/qemu/iov.h |   7 +++
>   util/iov.c | 112 +++--
>   2 files changed, 114 insertions(+), 5 deletions(-)
> 
> diff --git a/include/qemu/iov.h b/include/qemu/iov.h
> index 48b45987b7..f3787a0cf7 100644
> --- a/include/qemu/iov.h
> +++ b/include/qemu/iov.h
> @@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
>   
>   void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
>   void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int 
> niov);
> +void qemu_iovec_init_extended(
> +QEMUIOVector *qiov,
> +void *head_buf, size_t head_len,
> +QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
> +void *tail_buf, size_t tail_len);
> +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
> +   size_t offset, size_t len);
>   void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
>   void qemu_iovec_concat(QEMUIOVector *dst,
>  QEMUIOVector *src, size_t soffset, size_t sbytes);
> diff --git a/util/iov.c b/util/iov.c
> index 74e6ca8ed7..39b6e31494 100644
> --- a/util/iov.c
> +++ b/util/iov.c
> @@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>   qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
>   }
>   
> +/*
> + * qiov_find_iov
> + *
> + * Return pointer to iovec structure, where byte at @offset in original 
> vector
> + * @iov exactly is.
> + * Set @remaining_offset to be offset inside that iovec to the same byte.
> + */
> +static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
> + size_t *remaining_offset)
> +{
> +while (offset > 0 && offset >= iov->iov_len) {
> +offset -= iov->iov_len;
> +iov++;
> +}
> +*remaining_offset = offset;
> +
> +return iov;
> +}
> +
> +/*
> + * qiov_slice
> + *
> + * Find subarray of iovec's, containing requested range. @head would
> + * be offset in first iov (returned by the function), @tail would be
> + * count of extra bytes in last iovec (returned iov + @niov - 1).
> + */
> +static struct iovec *qiov_slice(QEMUIOVector *qiov,
> +size_t offset, size_t len,
> +size_t *head, size_t *tail, int *niov)
> +{
> +struct iovec *iov, *end_iov;
> +
> +assert(offset + len <= qiov->size);
> +
> +iov = iov_skip_offset(qiov->iov, offset, head);
> +end_iov = iov_skip_offset(iov, *head + len, tail);
> +
> +if (*tail > 0) {
> +assert(*tail < end_iov->iov_len);
> +*tail = end_iov->iov_len - *tail;
> +end_iov++;
> +}
> +
> +*niov = end_iov - iov;
> +
> +return iov;
> +}
> +
> +/*
> + * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
> + * and @tail_buf buffer into new qiov.
> + */
> +void qemu_iovec_init_extended(
> +QEMUIOVector *qiov,
> +void *head_buf, size_t head_len,
> +QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
> +void *tail_buf, size_t tail_len)
> +{
> +size_t mid_head, mid_tail;
> +int total_niov, mid_niov;

Oops, clang is right, mid_niov may be uninitialized. So, here should be 
"mid_niov = 0".

> +struct iovec *p, *mid_iov;
> +
> +if (mid_len) {
> +mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
> + &mid_head, &mid_tail, &mid_niov);
> +}
> +
> +total_niov = !!head_len + mid_niov + !!tail_len;
> +if (total_niov == 1) {
> +qemu_iovec_init_buf(qiov, NULL, 0);
> +p = &qiov->local_iov;
> +} else {
> +qiov->niov = qiov->nalloc = total_niov;
> +qiov->size = head_len + mid_len + tail_len;
> +p = qiov->iov = g_new(struct iovec, qiov->niov);
> +}
> +
> +if (head_len) {
> +p->iov_base = head_buf;
> +p->iov_len = head_len;
> +p++;
> +}
> +
> +if (mid_len) {
> +memcpy(p, mid_iov, mid_niov * sizeof(*p));
> +p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
> +p[0].iov_len -= mid_head;
> +p[mid_niov - 1].iov_len -= mid_tail;
> +p += mid_niov;
> +}
> +
> +if (tail_len) {
> +p->iov_base = tail_buf;
> +p->iov_len = tail_len;
> +}
> +}
> +
>   /*
>* Check if the contents of the iovecs are all zero
>*/
> @@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov)
>   return true;
>   }
>   
> +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
> +   

[Qemu-devel] [PULL 4/5] migration-test: Add a test for fd protocol

2019-06-05 Thread Juan Quintela
From: Yury Kotov 

Signed-off-by: Yury Kotov 
Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Signed-off-by: Juan Quintela 
---
 tests/libqtest.c   |  80 ++--
 tests/libqtest.h   |  51 -
 tests/migration-test.c | 101 +
 3 files changed, 227 insertions(+), 5 deletions(-)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 546a875913..9b9b5f37fc 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -32,6 +32,7 @@
 
 #define MAX_IRQ 256
 #define SOCKET_TIMEOUT 50
+#define SOCKET_MAX_FDS 16
 
 QTestState *global_qtest;
 
@@ -391,6 +392,40 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(QTestState *s, 
const char *fmt, ...)
 va_end(ap);
 }
 
+/* Sends a message and file descriptors to the socket.
+ * It's needed for qmp-commands like getfd/add-fd */
+static void socket_send_fds(int socket_fd, int *fds, size_t fds_num,
+const char *buf, size_t buf_size)
+{
+ssize_t ret;
+struct msghdr msg = { 0 };
+char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)] = { 0 };
+size_t fdsize = sizeof(int) * fds_num;
+struct cmsghdr *cmsg;
+struct iovec iov = { .iov_base = (char *)buf, .iov_len = buf_size };
+
+msg.msg_iov = &iov;
+msg.msg_iovlen = 1;
+
+if (fds && fds_num > 0) {
+g_assert_cmpuint(fds_num, <, SOCKET_MAX_FDS);
+
+msg.msg_control = control;
+msg.msg_controllen = CMSG_SPACE(fdsize);
+
+cmsg = CMSG_FIRSTHDR(&msg);
+cmsg->cmsg_len = CMSG_LEN(fdsize);
+cmsg->cmsg_level = SOL_SOCKET;
+cmsg->cmsg_type = SCM_RIGHTS;
+memcpy(CMSG_DATA(cmsg), fds, fdsize);
+}
+
+do {
+ret = sendmsg(socket_fd, &msg, 0);
+} while (ret < 0 && errno == EINTR);
+g_assert_cmpint(ret, >, 0);
+}
+
 static GString *qtest_recv_line(QTestState *s)
 {
 GString *line;
@@ -545,7 +580,8 @@ QDict *qtest_qmp_receive(QTestState *s)
  * in the case that they choose to discard all replies up until
  * a particular EVENT is received.
  */
-void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+void qmp_fd_vsend_fds(int fd, int *fds, size_t fds_num,
+  const char *fmt, va_list ap)
 {
 QObject *qobj;
 
@@ -569,25 +605,49 @@ void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
 fprintf(stderr, "%s", str);
 }
 /* Send QMP request */
-socket_send(fd, str, qstring_get_length(qstr));
+if (fds && fds_num > 0) {
+socket_send_fds(fd, fds, fds_num, str, qstring_get_length(qstr));
+} else {
+socket_send(fd, str, qstring_get_length(qstr));
+}
 
 qobject_unref(qstr);
 qobject_unref(qobj);
 }
 }
 
+void qmp_fd_vsend(int fd, const char *fmt, va_list ap)
+{
+qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
+}
+
+void qtest_qmp_vsend_fds(QTestState *s, int *fds, size_t fds_num,
+ const char *fmt, va_list ap)
+{
+qmp_fd_vsend_fds(s->qmp_fd, fds, fds_num, fmt, ap);
+}
+
 void qtest_qmp_vsend(QTestState *s, const char *fmt, va_list ap)
 {
-qmp_fd_vsend(s->qmp_fd, fmt, ap);
+qmp_fd_vsend_fds(s->qmp_fd, NULL, 0, fmt, ap);
 }
 
 QDict *qmp_fdv(int fd, const char *fmt, va_list ap)
 {
-qmp_fd_vsend(fd, fmt, ap);
+qmp_fd_vsend_fds(fd, NULL, 0, fmt, ap);
 
 return qmp_fd_receive(fd);
 }
 
+QDict *qtest_vqmp_fds(QTestState *s, int *fds, size_t fds_num,
+  const char *fmt, va_list ap)
+{
+qtest_qmp_vsend_fds(s, fds, fds_num, fmt, ap);
+
+/* Receive reply */
+return qtest_qmp_receive(s);
+}
+
 QDict *qtest_vqmp(QTestState *s, const char *fmt, va_list ap)
 {
 qtest_qmp_vsend(s, fmt, ap);
@@ -616,6 +676,18 @@ void qmp_fd_send(int fd, const char *fmt, ...)
 va_end(ap);
 }
 
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+ const char *fmt, ...)
+{
+va_list ap;
+QDict *response;
+
+va_start(ap, fmt);
+response = qtest_vqmp_fds(s, fds, fds_num, fmt, ap);
+va_end(ap);
+return response;
+}
+
 QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
 {
 va_list ap;
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 32d927755d..cadf1d4a03 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -84,6 +84,21 @@ QTestState *qtest_init_with_serial(const char *extra_args, 
int *sock_fd);
  */
 void qtest_quit(QTestState *s);
 
+/**
+ * qtest_qmp_fds:
+ * @s: #QTestState instance to operate on.
+ * @fds: array of file descriptors
+ * @fds_num: number of elements in @fds
+ * @fmt...: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_escape() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU with fds and returns the response.
+ */
+QDict *qtest_qmp_fds(QTestState *s, int *fds, size_t fds_num,
+ const char *fmt, ...)
+GCC_FMT_ATTR(4, 5);
+
 /**
  * qtest_qmp:
  * @s: #QTestState instance to oper

Re: [Qemu-devel] [PATCH v4 07/11] hmat acpi: Build Memory Side Cache Information Structure(s) in ACPI HMAT

2019-06-05 Thread Igor Mammedov
On Wed, 5 Jun 2019 14:04:10 +0800
Tao Xu  wrote:

> On 6/4/2019 11:04 PM, Igor Mammedov wrote:
> > On Wed,  8 May 2019 14:17:22 +0800
> > Tao Xu  wrote:
> >   
> >> From: Liu Jingqi 
> >>
> >> This structure describes memory side cache information for memory
> >> proximity domains if the memory side cache is present and the
> >> physical device(SMBIOS handle) forms the memory side cache.
> >> The software could use this information to effectively place
> >> the data in memory to maximize the performance of the system
> >> memory that use the memory side cache.
> >>
> >> Signed-off-by: Liu Jingqi 
> >> Signed-off-by: Tao Xu 
> >> ---
> >>  
> ...
> >> +
> >> +/* SMBIOS Handles */
> >> +/* TBD: set smbios handles */
> >> +build_append_int_noprefix(table_data, 0, 2 * n);  
> > Is memory side cache structure useful at all without pointing to SMBIOS 
> > entries?
> >   
> They are not useful yet, and the kernel 5.1 HMAT sysfs doesn't show 
> SMBIOS entries. We can update it if it useful in the future.

In that case I'd suggest to drop it for now until this table is properly
populated and ready for consumption. (i.e. drop this patch and corresponding
CLI 9/11 patch).



[Qemu-devel] [PATCH v2 0/3] vmdk: Add read-only support for the new seSparse format

2019-06-05 Thread Sam Eiderman
v1:

VMware introduced a new snapshot format in VMFS6 - seSparse (Space
Efficient Sparse) which is the default format available in ESXi 6.7.
Add read-only support for the new snapshot format.

v2:

Fixed after Max's review:

* Removed strict sesparse checks
* Reduced maximal L1 table size
* Added non-write mode check in vmdk_open() on sesparse

Sam Eiderman (3):
  vmdk: Fix comment regarding max l1_size coverage
  vmdk: Reduce the max bound for L1 table size
  vmdk: Add read-only support for seSparse snapshots

 block/vmdk.c   | 371 ++---
 tests/qemu-iotests/059.out |   2 +-
 2 files changed, 352 insertions(+), 21 deletions(-)

-- 
2.13.3




[Qemu-devel] [PATCH v2 1/3] vmdk: Fix comment regarding max l1_size coverage

2019-06-05 Thread Sam Eiderman
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
extended the l1_size check from VMDK4 to VMDK3 but did not update the
default coverage in the moved comment.

The previous vmdk4 calculation:

(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB

The added vmdk3 calculation:

(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB

Adding the calculation of vmdk3 to the comment.

In any case, VMware does not offer virtual disks more than 2TB for
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
not implemented yet in qemu.

Reviewed-by: Karl Heubaum 
Reviewed-by: Eyal Moscovici 
Reviewed-by: Liran Alon 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 block/vmdk.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 51067c774f..0f2e453bf5 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -426,10 +426,15 @@ static int vmdk_add_extent(BlockDriverState *bs,
 return -EFBIG;
 }
 if (l1_size > 512 * 1024 * 1024) {
-/* Although with big capacity and small l1_entry_sectors, we can get a
+/*
+ * Although with big capacity and small l1_entry_sectors, we can get a
  * big l1_size, we don't want unbounded value to allocate the table.
- * Limit it to 512M, which is 16PB for default cluster and L2 table
- * size */
+ * Limit it to 512M, which is:
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
+ *cluster size: 64KB, L2 table size: 512 entries
+ * 1PB  - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
+ *cluster size: 512B, L2 table size: 4096 entries
+ */
 error_setg(errp, "L1 size too big");
 return -EFBIG;
 }
-- 
2.13.3




[Qemu-devel] [PATCH v2 2/3] vmdk: Reduce the max bound for L1 table size

2019-06-05 Thread Sam Eiderman
512M of L1 entries is a very loose bound, only 32M are required to store
the maximal supported VMDK file size of 2TB.

Fixed qemu-iotest 59# - now failure occures before on impossible L1
table size.

Reviewed-by: Karl Heubaum 
Reviewed-by: Eyal Moscovici 
Reviewed-by: Liran Alon 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
---
 block/vmdk.c   | 13 +++--
 tests/qemu-iotests/059.out |  2 +-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 0f2e453bf5..931eb2759c 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -425,15 +425,16 @@ static int vmdk_add_extent(BlockDriverState *bs,
 error_setg(errp, "Invalid granularity, image may be corrupt");
 return -EFBIG;
 }
-if (l1_size > 512 * 1024 * 1024) {
+if (l1_size > 32 * 1024 * 1024) {
 /*
  * Although with big capacity and small l1_entry_sectors, we can get a
  * big l1_size, we don't want unbounded value to allocate the table.
- * Limit it to 512M, which is:
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
- *cluster size: 64KB, L2 table size: 512 entries
- * 1PB  - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
- *cluster size: 512B, L2 table size: 4096 entries
+ * Limit it to 32M, which is enough to store:
+ * 8TB  - for both VMDK3 & VMDK4 with
+ *minimal cluster size: 512B
+ *minimal L2 table size: 512 entries
+ *8 TB is still more than the maximal value supported for
+ *VMDK3 & VMDK4 which is 2TB.
  */
 error_setg(errp, "L1 size too big");
 return -EFBIG;
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index f51394ae8e..4fab42a28c 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2358,5 +2358,5 @@ Offset  Length  Mapped to   File
 0x14000 0x1 0x5 TEST_DIR/t-s003.vmdk
 
 === Testing afl image with a very large capacity ===
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
 *** done
-- 
2.13.3




[Qemu-devel] [PATCH v2 3/3] vmdk: Add read-only support for seSparse snapshots

2019-06-05 Thread Sam Eiderman
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
QEMU).

This format was lacking in the following:

* Grain directory (L1) and grain table (L2) entries were 32-bit,
  allowing access to only 2TB (slightly less) of data.
* The grain size (default) was 512 bytes - leading to data
  fragmentation and many grain tables.
* For space reclamation purposes, it was necessary to find all the
  grains which are not pointed to by any grain table - so a reverse
  mapping of "offset of grain in vmdk" to "grain table" must be
  constructed - which takes large amounts of CPU/RAM.

The format specification can be found in VMware's documentation:
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf

In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
introduced: SESparse (Space Efficient).

This format fixes the above issues:

* All entries are now 64-bit.
* The grain size (default) is 4KB.
* Grain directory and grain tables are now located at the beginning
  of the file.
  + seSparse format reserves space for all grain tables.
  + Grain tables can be addressed using an index.
  + Grains are located in the end of the file and can also be
addressed with an index.
  - seSparse vmdks of large disks (64TB) have huge preallocated
headers - mainly due to L2 tables, even for empty snapshots.
* The header contains a reverse mapping ("backmap") of "offset of
  grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
  specifies for each grain - whether it is allocated or not.
  Using these data structures we can implement space reclamation
  efficiently.
* Due to the fact that the header now maintains two mappings:
* The regular one (grain directory & grain tables)
* A reverse one (backmap and free bitmap)
  These data structures can lose consistency upon crash and result
  in a corrupted VMDK.
  Therefore, a journal is also added to the VMDK and is replayed
  when the VMware reopens the file after a crash.

Since ESXi 6.7 - SESparse is the only snapshot format available.

Unfortunately, VMware does not provide documentation regarding the new
seSparse format.

This commit is based on black-box research of the seSparse format.
Various in-guest block operations and their effect on the snapshot file
were tested.

The only VMware provided source of information (regarding the underlying
implementation) was a log file on the ESXi:

/var/log/hostd.log

Whenever an seSparse snapshot is created - the log is being populated
with seSparse records.

Relevant log records are of the form:

[...] Const Header:
[...]  constMagic = 0xcafebabe
[...]  version= 2.1
[...]  capacity   = 204800
[...]  grainSize  = 8
[...]  grainTableSize = 64
[...]  flags  = 0
[...] Extents:
[...]  Header : <1 : 1>
[...]  JournalHdr : <2 : 2>
[...]  Journal: <2048 : 2048>
[...]  GrainDirectory : <4096 : 2048>
[...]  GrainTables: <6144 : 2048>
[...]  FreeBitmap : <8192 : 2048>
[...]  BackMap: <10240 : 2048>
[...]  Grain  : <12288 : 204800>
[...] Volatile Header:
[...] volatileMagic = 0xcafecafe
[...] FreeGTNumber  = 0
[...] nextTxnSeqNumber  = 0
[...] replayJournal = 0

The sizes that are seen in the log file are in sectors.
Extents are of the following format: 

This commit is a strict implementation which enforces:
* magics
* version number 2.1
* grain size of 8 sectors  (4KB)
* grain table size of 64 sectors
* zero flags
* extent locations

Additionally, this commit proivdes only a subset of the functionality
offered by seSparse's format:
* Read-only
* No journal replay
* No space reclamation
* No unmap support

Hence, journal header, journal, free bitmap and backmap extents are
unused, only the "classic" (L1 -> L2 -> data) grain access is
implemented.

However there are several differences in the grain access itself.
Grain directory (L1):
* Grain directory entries are indexes (not offsets) to grain
  tables.
* Valid grain directory entries have their highest nibble set to
  0x1.
* Since grain tables are always located in the beginning of the
  file - the index can fit into 32 bits - so we can use its low
  part if it's valid.
Grain table (L2):
* Grain table entries are indexes (not offsets) to grains.
* If the highest nibble of the entry is:
0x0:
The grain in not allocated.
The rest of the bytes are 0.
0x1:
The grain is unmapped - guest sees a zero grain.
The rest of the bits point to the previously mapped grain,
see 0x3 case.
0x2:
The grain is zero.
0x3:
The grain is allocated - to get the index calculate:
((entry & 0x0fff) >> 48) |
((entry & 0x

Re: [Qemu-devel] [PULL 5/5] migratioin/ram: leave RAMBlock->bmap blank on allocating

2019-06-05 Thread Philippe Mathieu-Daudé
migratioin -> migration



Re: [Qemu-devel] [PATCH] migratioin/ram.c: reset complete_round when we gets a queued page

2019-06-05 Thread Philippe Mathieu-Daudé
migratioin -> migration



[Qemu-devel] [PATCH v2 1/2] block: introduce pinned blk

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
Add stay_at_node fields to BlockBackend and BdrvChild, for the same
behavior as stay_at_node field of BdrvChildRole. It will be used for
block-job blk.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block_int.h  |  6 ++
 include/sysemu/block-backend.h |  2 ++
 block.c|  2 +-
 block/block-backend.c  | 25 -
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 06df2bda1b..1a2eebd904 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -729,6 +729,12 @@ struct BdrvChild {
  */
 bool frozen;
 
+/*
+ * This link should not be modified in bdrv_replace_node process. Used by
+ * should_update_child()
+ */
+bool stay_at_node;
+
 QLIST_ENTRY(BdrvChild) next;
 QLIST_ENTRY(BdrvChild) next_parent;
 };
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 733c4957eb..fb248be977 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -77,6 +77,8 @@ typedef struct BlockBackendPublic {
 } BlockBackendPublic;
 
 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new_pinned(AioContext *ctx,
+ uint64_t perm, uint64_t shared_perm);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
diff --git a/block.c b/block.c
index e3e77feee0..fda92c8629 100644
--- a/block.c
+++ b/block.c
@@ -3971,7 +3971,7 @@ static bool should_update_child(BdrvChild *c, 
BlockDriverState *to)
 GHashTable *found;
 bool ret;
 
-if (c->role->stay_at_node) {
+if (c->stay_at_node || c->role->stay_at_node) {
 return false;
 }
 
diff --git a/block/block-backend.c b/block/block-backend.c
index f5d9407d20..cd59f98e51 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -88,6 +88,11 @@ struct BlockBackend {
  * Accessed with atomic ops.
  */
 unsigned int in_flight;
+
+/*
+ * On blk_insert_bs() new child will inherit  @stay_at_node.
+ */
+bool stay_at_node;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -321,9 +326,14 @@ static const BdrvChildRole child_root = {
  * to other users of the attached node.
  * Both sets of permissions can be changed later using blk_set_perm().
  *
+ * @stay_at_node is used to set stay_at_node field of child, attached in
+ * blk_insert_bs(). If true, child bs will not be updated on bdrv_replace_node.
+ *
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
+static BlockBackend *blk_new_common(AioContext *ctx,
+uint64_t perm, uint64_t shared_perm,
+bool stay_at_node)
 {
 BlockBackend *blk;
 
@@ -332,6 +342,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, 
uint64_t shared_perm)
 blk->ctx = ctx;
 blk->perm = perm;
 blk->shared_perm = shared_perm;
+blk->stay_at_node = stay_at_node;
 blk_set_enable_write_cache(blk, true);
 
 blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
@@ -347,6 +358,17 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, 
uint64_t shared_perm)
 return blk;
 }
 
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
+{
+return blk_new_common(ctx, perm, shared_perm, false);
+}
+
+BlockBackend *blk_new_pinned(AioContext *ctx,
+ uint64_t perm, uint64_t shared_perm)
+{
+return blk_new_common(ctx, perm, shared_perm, true);
+}
+
 /*
  * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
  * The new BlockBackend is in the main AioContext.
@@ -808,6 +830,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, 
Error **errp)
 if (blk->root == NULL) {
 return -EPERM;
 }
+blk->root->stay_at_node = blk->stay_at_node;
 
 notifier_list_notify(&blk->insert_bs_notifiers, blk);
 if (tgm->throttle_state) {
-- 
2.18.0




[Qemu-devel] [PATCH v2 0/2] introduce pinned blk

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
Hi all.

Here is a proposal of replacing workaround in mirror, when
we have to move filter node back to block-job blk after
bdrv_replace_node.

v2: rebased on updated blk_new, with aio context paramter.


Vladimir Sementsov-Ogievskiy (2):
  block: introduce pinned blk
  blockjob: use blk_new_pinned in block_job_create

 include/block/block_int.h  |  6 ++
 include/sysemu/block-backend.h |  2 ++
 block.c|  2 +-
 block/block-backend.c  | 25 -
 block/mirror.c |  6 +-
 blockjob.c |  2 +-
 6 files changed, 35 insertions(+), 8 deletions(-)

-- 
2.18.0




[Qemu-devel] [PATCH v2 2/2] blockjob: use blk_new_pinned in block_job_create

2019-06-05 Thread Vladimir Sementsov-Ogievskiy
child_role job already has .stay_at_node=true, so on bdrv_replace_node
operation these child are unchanged. Make block job blk behave in same
manner, to avoid inconsistent intermediate graph states and workarounds
like in mirror.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c | 6 +-
 blockjob.c | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index f8bdb5b21b..23443116e4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -713,12 +713,8 @@ static int mirror_exit_common(Job *job)
 &error_abort);
 bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
 
-/* We just changed the BDS the job BB refers to (with either or both of the
- * bdrv_replace_node() calls), so switch the BB back so the cleanup does
- * the right thing. We don't need any permissions any more now. */
-blk_remove_bs(bjob->blk);
+/* We don't need any permissions any more now. */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
 
 bs_opaque->job = NULL;
 
diff --git a/blockjob.c b/blockjob.c
index 931d675c0c..f5c8d31491 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -398,7 +398,7 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 job_id = bdrv_get_device_name(bs);
 }
 
-blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
+blk = blk_new_pinned(bdrv_get_aio_context(bs), perm, shared_perm);
 ret = blk_insert_bs(blk, bs, errp);
 if (ret < 0) {
 blk_unref(blk);
-- 
2.18.0




Re: [Qemu-devel] [PATCH] qapi: add dirty-bitmaps to query-named-block-nodes result

2019-06-05 Thread Markus Armbruster
John Snow  writes:

> On 5/31/19 10:55 AM, Eric Blake wrote:
>> On 5/30/19 11:26 AM, John Snow wrote:
>>>
>>>
>>> On 5/30/19 10:39 AM, Vladimir Sementsov-Ogievskiy wrote:
 Let's add a possibility to query dirty-bitmaps not only on root nodes.
 It is useful when dealing both with snapshots and incremental backups.

>> 
 +++ b/block/qapi.c
 @@ -78,6 +78,11 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend 
 *blk,
  info->backing_file = g_strdup(bs->backing_file);
  }
  
 +if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
 +info->has_dirty_bitmaps = true;
 +info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
 +}
 +
  info->detect_zeroes = bs->detect_zeroes;
  
  if (blk && blk_get_public(blk)->throttle_group_member.throttle_state) 
 {

>>>
>>> So query-block uses bdrv_query_info, which calls bdrv_block_device_info,
>>> so we'll duplicate the bitmap output when doing the old-fashioned block
>>> query, but that's probably harmless overall.
>> 
>> We already know that none of our existing query- interfaces are sane
>> (either too little information, or too much).  Duplication starts to
>> push an interface towards too much (it takes processor time to bundle up
>> the extra JSON, especially if the other end is not going to care if it
>> was present). I know Kevin still has somewhere on his to-do list the
>> implementation of a saner query- command for the information we really
>> want (about each block, without redundant information, and where we
>> don't repeat information in a nested manner, but where we also don't
>> omit information that would otherwise require multiple existing query-
>> to reconstruct).
>> 
>>>
>>> We can continue to support the output in both places, or we could opt to
>>> deprecate the older interface; I think this is one of the last chances
>>> we'd get to do so before libvirt and wider adoption.
>>>
>>> I think that's probably Eric's choice.
>> 
>> If you want to try to deprecate the old location, introspection at least
>> works to allow libvirt to know which place to look for it on a given
>> qemu. If you don't think deprecation is necessary, the duplication is
>> probably tolerable for now (as ideally we'd be deprecating ALL of our
>> not-quite-perfect query- block interfaces in favor of whatever sane
>> interface Kevin comes up with).
>> 
>
> It sounds like it's probably the right move to deprecate the entire
> legacy interface, but still... If you have 20 or 30 bitmaps on a root
> node, you will see 40 or 60 entries.
>
> What's the smart way to deprecate it? We're not adding new flags or
> showing new arguments or anything. There might not be bitmaps, so you
> can't rely on that field being present or absent.
>
> Recommendations?

Kevin's "[PATCH v4 0/6] file-posix: Add dynamic-auto-read-only QAPI
feature" adds "feature flags" to the QAPI schema language, limited to
struct types, because that's what he needs.  They're visible in
introspection.  I intend to complete his work, so we can tack
"deprecated" feature flags to pretty much anything

Could that address your need?



Re: [Qemu-devel] qgraph

2019-06-05 Thread Natalia Fursova
Hello, Paolo!

Thank you for your answer. I would like to clarify something about the qmp
commands.
For example, consider SCSI controller "lsi53c895a". For getting information
we use two commands: "device-list-properties" and "qom-list-properties".
Output consists of many properties, but there is no information about
provided buses by this device. Is there a qmp command which provides this
information?


Best regards,
Natalia

-Original Message-
From: Paolo Bonzini [mailto:pbonz...@redhat.com] 
Sent: Tuesday, June 04, 2019 6:06 PM
To: Natalia Fursova; qemu-devel@nongnu.org; Паша
Subject: Re: [Qemu-devel] qgraph

On 04/06/19 10:37, Natalia Fursova wrote:
> Hello, Paolo!
> 
> We develop GUI for Qemu and want to implement graphical interface for
> configuring the device tree. To do this we need to detect list of the
> devices supported by any specific platform.
> 
> Then we use this tree for command line generation.
> 
> Existing qmp commands don't supply enough information to build the tree.
> They miss bus and interface data.

Hi!

Bus data is implicit in the parent class provided by QMP.  For example,
pci-device is the superclass of PCI devices, scsi-device is the
superclass of SCSI devices, etc.

qgraph is indeed similar, but it is only used by test cases, not by QEMU
itself.  The difference is that qgraph links together _drivers_ for the
devices, and so it is limited to those devices that have tests.  For
some ideas behind qgraph, see
https://wiki.qemu.org/Features/qtest_driver_framework.

Paolo

> 
>  
> 
> There is something called 'qgraph' in Qemu. It looks similar to data
> structures in our GUI tool.
> 
>  
> 
> Tell me please, do you have a plan on improving this interface? We found
the
> following hardcoded lines in the tests:
> 
> qos_node_consumes("megasas", "pci-bus", &opts);
> 
> qos_node_produces("megasas", "pci-device");
> 
>  
> 
> And we wanted to extend QMP to query this kind of information.
> 
>  
> 
>  
> 
> Best regards, 
> 
> Natalia
> 
>  
> 





Re: [Qemu-devel] [PATCH v7 0/4] rng-builtin: add an RNG backend that uses qemu_guest_getrandom()

2019-06-05 Thread Markus Armbruster
Laurent Vivier  writes:

> Add a new RNG backend using QEMU builtin getrandom function.
>
> v7: rebase on master
> Make rng-builtin asynchronous with QEMUBH (removed existing R-b)

Pardon the ignorant question: why is that necessary?



Re: [Qemu-devel] QMP; unsigned 64-bit ints; JSON standards compliance

2019-06-05 Thread Daniel P . Berrangé
On Tue, Jun 04, 2019 at 08:38:24AM +0200, Markus Armbruster wrote:
> We've discussed possible solutions.  Is anyone working or intending to
> work on patches?

I'm not actively working on it now, nor any plans in the near future.

I would like to see it fixed sooner rather than later though.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] qgraph

2019-06-05 Thread Paolo Bonzini
On 05/06/19 14:34, Natalia Fursova wrote:
> 
> Thank you for your answer. I would like to clarify something about the qmp
> commands.
> For example, consider SCSI controller "lsi53c895a". For getting information
> we use two commands: "device-list-properties" and "qom-list-properties".
> Output consists of many properties, but there is no information about
> provided buses by this device. Is there a qmp command which provides this
> information?

Unfortunately there is no information in QMP about buses that are
provided.   qom-list-types gives the buses that are requested.

Paolo



[Qemu-devel] [PATCH] docs/vhost-user.json: some firmware.json copy leftovers

2019-06-05 Thread Marc-André Lureau
Signed-off-by: Marc-André Lureau 
---
 docs/interop/vhost-user.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json
index ae88c03117..da6aaf51c8 100644
--- a/docs/interop/vhost-user.json
+++ b/docs/interop/vhost-user.json
@@ -178,11 +178,11 @@
 #
 #   - /usr/share/qemu/vhost-user/50-crosvm-gpu.json
 #
-# then the sysadmin can prevent the default QEMU being used at all with
+# then the sysadmin can prevent the default QEMU GPU being used at all with
 #
 #   $ touch /etc/qemu/vhost-user/50-qemu-gpu.json
 #
-# The sysadmin can replace/alter the distro default OVMF with
+# The sysadmin can replace/alter the distro default QEMU GPU with
 #
 #   $ vim /etc/qemu/vhost-user/50-qemu-gpu.json
 #
@@ -190,7 +190,7 @@
 #
 #   $ vim /etc/qemu/vhost-user/10-qemu-gpu.json
 #
-# or they can provide a parallel OVMF with lower priority
+# or they can provide a parallel QEMU GPU with lower priority
 #
 #   $ vim /etc/qemu/vhost-user/99-qemu-gpu.json
 #
-- 
2.22.0.rc2.384.g1a9a72ea1d




[Qemu-devel] [Bug 1831225] Re: guest migration 100% cpu freeze bug

2019-06-05 Thread Jean-Philippe Menil
Hi,

i suffer fro this bug too (or very similar) on 4.15.0-50-generic,
without the patch mentionned earlier (i use this patch last year to
migrate from previous qemu version).

Jean-Philippe

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1831225

Title:
  guest migration 100% cpu freeze bug

Status in QEMU:
  New

Bug description:
  # Investigate migration cpu hog(100%) bug

  I have some issues when migrating from kernel 4.14.63 running qemu 2.11.2 to 
kernel 4.19.43 running qemu 2.11.2.
  The hypervisors are running on debian jessie with libvirt v5.3.0.
  Linux, libvirt and qemu are all custom compiled.

  I migrated around 10.000 vms and every once in a while a vm is stuck
  at 100% cpu after what we can see right now is that the target
  hypervisor runs on linux 4.19.53. This happened with 4 vms so far. It
  is not that easy to debug, we found this out pretty quickly because we
  are running monitoring on frozen vms after migrations.

  Last year we were having the same "kind of" bug 
https://bugs.launchpad.net/qemu/+bug/177 when trying to upgrade qemu 2.6 to 
2.11.
  This bug was fixed after applying the following patch: 
http://lists.nongnu.org/archive/html/qemu-devel/2018-04/msg00820.html

  This patch is still applied as you can see because of the available pre_load 
var on the kvmclock_vmsd struct:
  (gdb) ptype kvmclock_vmsd
  type = const struct VMStateDescription {
  const char *name;
  int unmigratable;
  int version_id;
  int minimum_version_id;
  int minimum_version_id_old;
  MigrationPriority priority;
  LoadStateHandler *load_state_old;
  int (*pre_load)(void *);
  int (*post_load)(void *, int);
  int (*pre_save)(void *);
  _Bool (*needed)(void *);
  VMStateField *fields;
  const VMStateDescription **subsections;
  }

  I attached gdb to a vcpu thread of one stuck vm, and a bt showed the 
following info:
  Thread 4 (Thread 0x7f3a431a4700 (LWP 37799)):
  #0  0x7f3a576f5017 in ioctl () at ../sysdeps/unix/syscall-template.S:84
  #1  0x55d84d15de57 in kvm_vcpu_ioctl (cpu=cpu@entry=0x55d84fca78d0, 
type=type@entry=44672) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:2050
  #2  0x55d84d15dfc6 in kvm_cpu_exec (cpu=cpu@entry=0x55d84fca78d0) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:1887
  #3  0x55d84d13ab64 in qemu_kvm_cpu_thread_fn (arg=0x55d84fca78d0) at 
/home/dbosschieter/src/qemu-pkg/src/cpus.c:1136
  #4  0x7f3a579ba4a4 in start_thread (arg=0x7f3a431a4700) at 
pthread_create.c:456
  #5  0x7f3a576fcd0f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:97

  Thread 3 (Thread 0x7f3a439a5700 (LWP 37798)):
  #0  0x7f3a576f5017 in ioctl () at ../sysdeps/unix/syscall-template.S:84
  #1  0x55d84d15de57 in kvm_vcpu_ioctl (cpu=cpu@entry=0x55d84fc5cbb0, 
type=type@entry=44672) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:2050
  #2  0x55d84d15dfc6 in kvm_cpu_exec (cpu=cpu@entry=0x55d84fc5cbb0) at 
/home/dbosschieter/src/qemu-pkg/src/accel/kvm/kvm-all.c:1887
  #3  0x55d84d13ab64 in qemu_kvm_cpu_thread_fn (arg=0x55d84fc5cbb0) at 
/home/dbosschieter/src/qemu-pkg/src/cpus.c:1136
  #4  0x7f3a579ba4a4 in start_thread (arg=0x7f3a439a5700) at 
pthread_create.c:456
  #5  0x7f3a576fcd0f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:97

  The ioctl call is a ioctl(18, KVM_RUN and it looks like it is looping
  inside the vm itself.

  I saved the state of the VM (with `virsh save`) after I found it was hanging 
on its vcpu threads. Then I restored this vm on a test environment running the 
same kernel, QEMU and libvirt version). After the restore the VM still was 
haning at 100% cpu usage on all the vcpus.
  I tried to use the perf kvm guest option to trace the guest vm with a copy of 
the kernel, modules and kallsyms files from inside the guest vm and I got to 
the following perf stat:

   Event Total %Total CurAvg/s
   kvm_entry   5198993   23.1   277007
   kvm_exit5198976   23.1   277006
   kvm_apic17321037.792289
   kvm_msr 17321017.792289
   kvm_inj_virq17319047.792278
   kvm_eoi 17319007.792278
   kvm_apic_accept_irq 17319007.792278
   kvm_hv_timer_state  17317807.792274
   kvm_pv_eoi  17317017.792267
   kvm_ple_window   360.02
   Total  22521394 1199967

  We tried to run the crash tool against a dump of guest vm memory and that 
gave 

Re: [Qemu-devel] [PATCH] target/i386: save EFER for 32-bit targets

2019-06-05 Thread Eduardo Habkost
On Wed, May 29, 2019 at 03:41:56PM +0300, Pavel Dovgalyuk wrote:
> i386 (32 bit) emulation uses EFER in wrmsr and in MMU fault
> processing.
> But it does not included in VMState, because "efer" field is disabled with
> #ifdef TARGET_X86_64
> 
> This patch adds a section for 32-bit targets which saves EFER when
> it's value is non-zero.
> 
> Signed-off-by: Pavel Dovgalyuk 

Queued, thanks!

-- 
Eduardo



Re: [Qemu-devel] [PATCH 1/1] accel: Remove unused AccelClass::opt_name attribute

2019-06-05 Thread Eduardo Habkost
On Fri, May 31, 2019 at 12:53:34PM -0400, Wainer dos Santos Moschetta wrote:
> The AccelType type was converted to AccelClass QOM
> object on b14a0b7469f, and the original data type had
> a field to store the option name which in turn was
> used to search an accelerator. The lookup method
> (accel_find) changed too, making the option field
> unnecessary but it became AccelClass::opt_name despite
> that. Therefore, and given that none accelerator
> implementation sets AccelClass::opt_name, let's
> remove this attribute.
> 
> Signed-off-by: Wainer dos Santos Moschetta 

Queued, thanks!

-- 
Eduardo



Re: [Qemu-devel] [PATCH] migration/multifd: sync packet_num after all thread are done

2019-06-05 Thread Wei Yang
On Wed, Jun 05, 2019 at 12:39:06PM +0200, Juan Quintela wrote:
>Wei Yang  wrote:
>> Notification from recv thread is not ordered, which means we may be
>> notified by one MultiFDRecvParams but adjust packet_num for another.
>>
>> Move the adjustment after we are sure each recv thread are sync-ed.
>>
>> Signed-off-by: Wei Yang 
>
>
>Reviewed-by: Juan Quintela 
>
>It shouldn't matter a lot in real life, but I agree that it is better.

Yep, thanks.

-- 
Wei Yang
Help you, Help me



  1   2   3   >