Re: [Qemu-devel] [PATCH v4 04/12] migration: avoid concurrent invoke channel_close by different threads

2018-05-31 Thread 858585 jemmy
On Wed, May 30, 2018 at 10:45 PM, Dr. David Alan Gilbert
 wrote:
> * Lidong Chen (jemmy858...@gmail.com) wrote:
>> From: Lidong Chen 
>>
>> The channel_close maybe invoked by different threads. For example, source
>> qemu invokes qemu_fclose in main thread, migration thread and return path
>> thread. Destination qemu invokes qemu_fclose in main thread, listen thread
>> and COLO incoming thread.
>>
>> Add a mutex in QEMUFile struct to avoid concurrent invoke channel_close.
>>
>> Signed-off-by: Lidong Chen 
>> ---
>>  migration/qemu-file.c | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
>> index 977b9ae..87d0f05 100644
>> --- a/migration/qemu-file.c
>> +++ b/migration/qemu-file.c
>> @@ -52,6 +52,7 @@ struct QEMUFile {
>>  unsigned int iovcnt;
>>
>>  int last_error;
>> +QemuMutex lock;
>
> That could do with a comment saying what you're protecting
>
>>  };
>>
>>  /*
>> @@ -96,6 +97,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps 
>> *ops)
>>
>>  f = g_new0(QEMUFile, 1);
>>
>> +qemu_mutex_init(&f->lock);
>>  f->opaque = opaque;
>>  f->ops = ops;
>>  return f;
>> @@ -328,7 +330,9 @@ int qemu_fclose(QEMUFile *f)
>>  ret = qemu_file_get_error(f);
>>
>>  if (f->ops->close) {
>> +qemu_mutex_lock(&f->lock);
>>  int ret2 = f->ops->close(f->opaque);
>> +qemu_mutex_unlock(&f->lock);
>
> OK, and at least for the RDMA code, if it calls
> close a 2nd time, rioc->rdma is checked so it wont actually free stuff a
> 2nd time.
>
>>  if (ret >= 0) {
>>  ret = ret2;
>>  }
>> @@ -339,6 +343,7 @@ int qemu_fclose(QEMUFile *f)
>>  if (f->last_error) {
>>  ret = f->last_error;
>>  }
>> +qemu_mutex_destroy(&f->lock);
>>  g_free(f);
>
> Hmm but that's not safe; if two things really do call qemu_fclose()
> on the same structure they race here and can end up destroying the lock
> twice, or doing f->lock  after the 1st one has already g_free(f).

>
>
> So lets go back a step.
> I think:
>   a) There should always be a separate QEMUFile* for
>  to_src_file and from_src_file - I don't see where you open
>  the 2nd one; I don't see your implementation of
>  f->ops->get_return_path.

yes, current qemu version use a separate QEMUFile* for to_src_file and
from_src_file.
and the two QEMUFile point to one QIOChannelRDMA.

the f->ops->get_return_path is implemented by channel_output_ops or
channel_input_ops.

>   b) I *think* that while the different threads might all call
>  fclose(), I think there should only ever be one qemu_fclose
>  call for each direction on the QEMUFile.
>
> But now we have two problems:
>   If (a) is true then f->lock  is separate on each one so
>doesn't really protect if the two directions are closed
>at once. (Assuming (b) is true)

yes, you are right.  so I should add a QemuMutex in QIOChannel structure, not
QEMUFile structure. and qemu_mutex_destroy the QemuMutex in
qio_channel_finalize.

Thank you.

>
>   If (a) is false and we actually share a single QEMUFile then
>  that race at the end happens.
>
> Dave
>
>
>>  trace_qemu_file_fclose();
>>  return ret;
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



[Qemu-devel] [PATCH] KVM: GIC: Fix memory leak due to calling kvm_init_irq_routing twice

2018-05-31 Thread Shannon Zhao
kvm_irqchip_create called by kvm_init will call kvm_init_irq_routing to
initialize global capability variables. If we call kvm_init_irq_routing in
GIC realize function, previous allocated memory will leak.

Fix this by deleting the unnecessary call.

Signed-off-by: Shannon Zhao 
---
 hw/intc/arm_gic_kvm.c   | 1 -
 hw/intc/arm_gicv3_kvm.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 6f467e6..204369d 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -572,7 +572,6 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
**errp)
 
 if (kvm_has_gsi_routing()) {
 /* set up irq routing */
-kvm_init_irq_routing(kvm_state);
 for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
 kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
 }
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 001d82b..aa4c7c5 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -813,7 +813,6 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 if (kvm_has_gsi_routing()) {
 /* set up irq routing */
-kvm_init_irq_routing(kvm_state);
 for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
 kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
 }
-- 
2.0.4





Re: [Qemu-devel] [PATCH v4 10/12] migration: create a dedicated thread to release rdma resource

2018-05-31 Thread 858585 jemmy
On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
 wrote:
> * Lidong Chen (jemmy858...@gmail.com) wrote:
>> ibv_dereg_mr wait for a long time for big memory size virtual server.
>>
>> The test result is:
>>   10GB  326ms
>>   20GB  699ms
>>   30GB  1021ms
>>   40GB  1387ms
>>   50GB  1712ms
>>   60GB  2034ms
>>   70GB  2457ms
>>   80GB  2807ms
>>   90GB  3107ms
>>   100GB 3474ms
>>   110GB 3735ms
>>   120GB 4064ms
>>   130GB 4567ms
>>   140GB 4886ms
>>
>> this will cause the guest os hang for a while when migration finished.
>> So create a dedicated thread to release rdma resource.
>>
>> Signed-off-by: Lidong Chen 
>> ---
>>  migration/rdma.c | 21 +
>>  1 file changed, 17 insertions(+), 4 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index dfa4f77..1b9e261 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -2979,12 +2979,12 @@ static void 
>> qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
>>  }
>>  }
>>
>> -static int qio_channel_rdma_close(QIOChannel *ioc,
>> -  Error **errp)
>> +static void *qio_channel_rdma_close_thread(void *arg)
>>  {
>> -QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> +QIOChannelRDMA *rioc = arg;
>>  RDMAContext *rdmain, *rdmaout;
>> -trace_qemu_rdma_close();
>> +
>> +rcu_register_thread();
>>
>>  rdmain = rioc->rdmain;
>>  if (rdmain) {
>> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
>>  g_free(rdmain);
>>  g_free(rdmaout);
>>
>> +rcu_unregister_thread();
>> +return NULL;
>> +}
>> +
>> +static int qio_channel_rdma_close(QIOChannel *ioc,
>> +  Error **errp)
>> +{
>> +QemuThread t;
>> +QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> +trace_qemu_rdma_close();
>> +
>> +qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>> +   rioc, QEMU_THREAD_DETACHED);
>
> I don't think this can be this simple; consider the lock in patch 4;
> now that lock means qui_channel_rdma_close() can't be called in
> parallel; but with this change it means:
>
>
>  f->lock
>qemu_thread_create  (1)
> !f->lock
>  f->lock
>qemu_thread_create
> !f->lock
>
> so we don't really protect the thing you were trying to lock

yes, I should not use rioc as the thread arg.

static int qio_channel_rdma_close(QIOChannel *ioc,
  Error **errp)
{
QemuThread t;
RDMAContext *rdma[2];
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);

trace_qemu_rdma_close();
if (rioc->rdmain || rioc->rdmaout) {
rdma[0] =  rioc->rdmain;
rdma[1] =  rioc->rdmaout;
qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
   rdma, QEMU_THREAD_DETACHED);
rioc->rdmain = NULL;
rioc->rdmaout = NULL;
}
return 0;
}

>
> Dave
>
>>  return 0;
>>  }
>>
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



[Qemu-devel] [PATCH v2] tap: fix memory leak on success to create a tap device

2018-05-31 Thread wangyunjian
From: Yunjian Wang 

The memory leak on success to create a tap device. And the nfds and
nvhosts may not be the same and need to be processed separately.

Fixes: 07825977 ("tap: fix memory leak on failure to create a multiqueue tap 
device")
Fixes: 264986e2 ("tap: multiqueue support")

CC: QEMU Stable 
Signed-off-by: Yunjian Wang 
---
v2:
 -add commit log and cc qemu-stable with fixes tags
---
 net/tap.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index de05f20..6d7710f 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -803,7 +803,8 @@ int net_init_tap(const Netdev *netdev, const char *name,
 } else if (tap->has_fds) {
 char **fds;
 char **vhost_fds;
-int nfds, nvhosts;
+int nfds = 0, nvhosts = 0;
+int ret = 0;
 
 if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
@@ -823,6 +824,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
 if (nfds != nvhosts) {
 error_setg(errp, "The number of fds passed does not match "
"the number of vhostfds passed");
+ret = -1;
 goto free_fail;
 }
 }
@@ -831,6 +833,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
 fd = monitor_fd_param(cur_mon, fds[i], &err);
 if (fd == -1) {
 error_propagate(errp, err);
+ret = -1;
 goto free_fail;
 }
 
@@ -841,6 +844,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
 } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
 error_setg(errp,
"vnet_hdr not consistent across given tap fds");
+ret = -1;
 goto free_fail;
 }
 
@@ -850,21 +854,21 @@ int net_init_tap(const Netdev *netdev, const char *name,
  vnet_hdr, fd, &err);
 if (err) {
 error_propagate(errp, err);
+ret = -1;
 goto free_fail;
 }
 }
-g_free(fds);
-g_free(vhost_fds);
-return 0;
 
 free_fail:
+for (i = 0; i < nvhosts; i++) {
+g_free(vhost_fds[i]);
+}
 for (i = 0; i < nfds; i++) {
 g_free(fds[i]);
-g_free(vhost_fds[i]);
 }
 g_free(fds);
 g_free(vhost_fds);
-return -1;
+return ret;
 } else if (tap->has_helper) {
 if (tap->has_ifname || tap->has_script || tap->has_downscript ||
 tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
-- 
1.8.3.1





Re: [Qemu-devel] [PATCH] tcg: Pass tb and index to tcg_gen_exit_tb separately

2018-05-31 Thread Laurent Vivier
Le 31/05/2018 à 03:13, Richard Henderson a écrit :
> Do the cast to uintptr_t within the helper, so that the compiler
> can type check the pointer argument.  We can also do some more
> sanity checking of the index argument.
> 
> Signed-off-by: Richard Henderson 
> ---
>  include/exec/gen-icount.h |  2 +-
>  tcg/tcg-op.h  | 17 +
>  target/alpha/translate.c  | 12 ++--
>  target/arm/translate-a64.c|  6 +++---
>  target/arm/translate.c|  8 
>  target/cris/translate.c   |  6 +++---
>  target/hppa/translate.c   |  6 +++---
>  target/i386/translate.c   |  8 
>  target/lm32/translate.c   |  6 +++---
>  target/m68k/translate.c   |  6 +++---
>  target/microblaze/translate.c |  6 +++---
>  target/mips/translate.c   |  4 ++--
>  target/moxie/translate.c  | 14 +++---
>  target/nios2/translate.c  |  8 
>  target/openrisc/translate.c   |  6 +++---
>  target/ppc/translate.c|  4 ++--
>  target/riscv/translate.c  | 20 ++--
>  target/s390x/translate.c  | 10 +-
>  target/sh4/translate.c|  8 
>  target/sparc/translate.c  | 16 
>  target/tilegx/translate.c |  4 ++--
>  target/tricore/translate.c| 20 ++--
>  target/unicore32/translate.c  |  6 +++---
>  target/xtensa/translate.c |  4 ++--
>  tcg/tcg-op.c  | 20 +++-
>  25 files changed, 127 insertions(+), 100 deletions(-)
> 
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index 54aaa61d65..24f7991781 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -52,7 +52,7 @@ static inline void gen_tb_end(TranslationBlock *tb, int 
> num_insns)
>  }
>  
>  gen_set_label(tcg_ctx->exitreq_label);
> -tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED);
> +tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
>  }
>  
>  static inline void gen_io_start(void)
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index 04eb3e9e17..7513c1eb7c 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -782,10 +782,19 @@ static inline void tcg_gen_insn_start(target_ulong pc, 
> target_ulong a1,
>  # error "Unhandled number of operands to insn_start"
>  #endif
>  
> -static inline void tcg_gen_exit_tb(uintptr_t val)
> -{
> -tcg_gen_op1i(INDEX_op_exit_tb, val);
> -}
> +/**
> + * tcg_gen_exit_tb() - output exit_tb TCG operation
> + * @tb: The TranslationBlock from which we are exiting
> + * @idx: Direct jump slot index, or exit request
> + *
> + * See tcg/README for more info about this TCG operation.
> + * See also tcg.h and the block comment above TB_EXIT_MASK.
> + *
> + * For a normal exit from the TB, back to the main loop, @tb should
> + * be NULL and @idx should be 0.  Otherwise, @tb should be valid and
> + * @idx should be one of the TB_EXIT_ values.
> + */
> +void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx);
>  
>  /**
>   * tcg_gen_goto_tb() - output goto_tb TCG operation
> diff --git a/target/alpha/translate.c b/target/alpha/translate.c
> index 15eca71d49..e5d62850c5 100644
> --- a/target/alpha/translate.c
> +++ b/target/alpha/translate.c
> @@ -488,7 +488,7 @@ static DisasJumpType gen_bdirect(DisasContext *ctx, int 
> ra, int32_t disp)
>  } else if (use_goto_tb(ctx, dest)) {
>  tcg_gen_goto_tb(0);
>  tcg_gen_movi_i64(cpu_pc, dest);
> -tcg_gen_exit_tb((uintptr_t)ctx->base.tb);
> +tcg_gen_exit_tb(ctx->base.tb, 0);
>  return DISAS_NORETURN;
>  } else {
>  tcg_gen_movi_i64(cpu_pc, dest);
> @@ -507,12 +507,12 @@ static DisasJumpType gen_bcond_internal(DisasContext 
> *ctx, TCGCond cond,
>  
>  tcg_gen_goto_tb(0);
>  tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
> -tcg_gen_exit_tb((uintptr_t)ctx->base.tb);
> +tcg_gen_exit_tb(ctx->base.tb, 0);
>  
>  gen_set_label(lab_true);
>  tcg_gen_goto_tb(1);
>  tcg_gen_movi_i64(cpu_pc, dest);
> -tcg_gen_exit_tb((uintptr_t)ctx->base.tb + 1);
> +tcg_gen_exit_tb(ctx->base.tb, 1);
>  
>  return DISAS_NORETURN;
>  } else {
> @@ -1273,7 +1273,7 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, 
> int palcode)
>  if (!use_exit_tb(ctx)) {
>  tcg_gen_goto_tb(0);
>  tcg_gen_movi_i64(cpu_pc, entry);
> -tcg_gen_exit_tb((uintptr_t)ctx->base.tb);
> +tcg_gen_exit_tb(ctx->base.tb, 0);
>  return DISAS_NORETURN;
>  } else {
>  tcg_gen_movi_i64(cpu_pc, entry);
> @@ -3009,7 +3009,7 @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, 
> CPUState *cpu)
>  if (use_goto_tb(ctx, ctx->base.pc_next)) {
>  tcg_gen_goto_tb(0);
>  tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
> -tcg_gen_exit_tb((uintptr_t)ctx->base.tb);
> +tcg_gen_exit_tb(ctx->base.tb, 0);
>  }
>  /* FALLTHRU */
> 

Re: [Qemu-devel] [libvirt] [PATCH 2/5] qemu: Move checks for SMM from command-line creation into validation phase

2018-05-31 Thread Laszlo Ersek
adding qemu-devel, Paolo and Gerd; comments below:

On 05/30/18 23:08, Martin Kletzander wrote:
> On Wed, May 30, 2018 at 11:02:59AM -0400, John Ferlan wrote:
>>
>>
>> On 05/21/2018 11:00 AM, Martin Kletzander wrote:
>>> We are still hoping all of such checks will be moved there and this
>>> is one small
>>> step in that direction.
>>>
>>> One of the things that this is improving is the error message you get
>>> when
>>> starting a domain with SMM and i440fx, for example.  Instead of
>>> saying that the
>>> QEMU binary doesn't support that option, we correctly say that it is
>>> only
>>> supported with q35 machine type.
>>>
>>> Signed-off-by: Martin Kletzander 
>>> ---
>>>  src/qemu/qemu_capabilities.c | 21 +++--
>>>  src/qemu/qemu_capabilities.h |  4 ++--
>>>  src/qemu/qemu_command.c  | 12 ++--
>>>  src/qemu/qemu_domain.c   | 12 +---
>>>  4 files changed, 28 insertions(+), 21 deletions(-)
>>>
>>
>> I know it's outside the bounds of what you're doing; however,
>> qemuDomainDefValidateFeatures could check the capabilities for other
>> bits too...
>>
> 
> Probably, but I mostly wanted to do that because SMM is not only about the
> capability, but also about the machine.  Good idea for the future, though.
> 
>> [...]
>>
>>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>>> index d3beee5d8760..881d0ea46a75 100644
>>> --- a/src/qemu/qemu_domain.c
>>> +++ b/src/qemu/qemu_domain.c
>>> @@ -3430,7 +3430,8 @@ qemuDomainDefGetVcpuHotplugGranularity(const
>>> virDomainDef *def)
>>>
>>>
>>>  static int
>>> -qemuDomainDefValidateFeatures(const virDomainDef *def)
>>> +qemuDomainDefValidateFeatures(const virDomainDef *def,
>>> +  virQEMUCapsPtr qemuCaps)
>>>  {
>>>  size_t i;
>>>
>>> @@ -3477,6 +3478,12 @@ qemuDomainDefValidateFeatures(const
>>> virDomainDef *def)
>>>  }
>>>  break;
>>>
>>> +    case VIR_DOMAIN_FEATURE_SMM:
>>> +    if (def->features[i] == VIR_TRISTATE_SWITCH_ON &&
>>
>> Probably should change to != _ABSENT, since qemu_command will supply
>> smm={on|off}
>>
> 
> That makes sense, kind of.  For 'off' we only need to check if we can
> specify
> the smm= option.  The thing is that you can even specify smm=on with
> non-q35
> machine type, but it is unclear what that's going to mean since it doesn't
> really make sense.
> 
> @Laszlo: What would you say?  Should we allow users to specify smm=on
> for users?
> Or even better, does it makes sense to allow specifying smm=anything for
> non-q35
> machine types?  If not, we'll leave it like this, that is smm=anything is
> forbidden for non-q35 machine types.

Technically the "smm" machine type property is defined for both i440fx
and q35:

$ qemu-system-x86_64 -machine pc,\? 2>&1 | grep smm
pc-i440fx-2.11.smm=OnOffAuto (Enable SMM (pc & q35))

$ qemu-system-x86_64 -machine q35,\? 2>&1 | grep smm
pc-q35-2.11.smm=OnOffAuto (Enable SMM (pc & q35))

The "smm" property controls whether system management mode is emulated,
to my knowledge. That's an x86 processor operating mode, which isn't
specific to the Q35 board. What's specific to Q35 is the TSEG.

The primary reason why OVMF (built with the edk2 SMM driver stack)
requires Q35 is not that i440fx does not emulate SMM, it's that i440fx
doesn't have a large enough SMRAM area. (The legacy SMRAM areas are too
small for OVMF.)

For example, SeaBIOS too includes some SMM code (optionally, if it's
built like that), and that runs on i440fx just fine, because the legacy
SMRAM areas are large enough for SeaBIOS's purposes, AIUI.

(Now, there's at least one other reason why OVMF/SMM needs Q35: because
the SMI broadcast feature too is only implemented on Q35. But that's
rather a consequence of the above "primary reason", and not a
stand-alone reason itself -- we restricted the SMI broadcast feature to
Q35 *because* OVMF could only run on Q35. Anyhow, you need not care
about SMI broadcast because it's automatically negotiated between guest
fw and QEMU.)

Summary:

(1) For making Secure Boot actually secure in OVMF, OVMF needs to be
built with -D SMM_REQUIRE, so that it include the SMM driver stack.

(2) Booting such a firmware binary requires Q35, because only Q35 offers
TSEG, and the legacy -- non-TSEG -- SMRAM ranges are too small even for
a "basic boot" of OVMF.

(3) QEMU has to be configured with "-machine smm=on"; this is already
covered by libvirt via .

(4) QEMU has to be configured to restrict pflash writes to code that
executes in SMM, with "-global
driver=cfi.pflash01,property=secure,value=on". Libvirt covers this
already with the @secure=yes attribute in .

(5) There are two *quality of service* features related to SMM; with
both of them being Q35-only. The first feature is SMI broadcast; libvirt
need not care because it's auto-negotiated.

(6) The second QoS feature is *extended* TSEG (a paravirt feature on top
of the standard TSEG), which lets the edk2 SMM driver stack scale to
large RAM sizes and 

Re: [Qemu-devel] [PATCH v4 11/12] migration: poll the cm event while wait RDMA work request completion

2018-05-31 Thread 858585 jemmy
On Thu, May 31, 2018 at 1:33 AM, Dr. David Alan Gilbert
 wrote:
> * Lidong Chen (jemmy858...@gmail.com) wrote:
>> If the peer qemu is crashed, the qemu_rdma_wait_comp_channel function
>> maybe loop forever. so we should also poll the cm event fd, and when
>> receive any cm event, we consider some error happened.
>>
>> Signed-off-by: Lidong Chen 
>
> I don't understand enough about the way the infiniband fd's work to
> fully review this; so I'd appreciate if some one who does could
> comment/add their review.

Hi Avaid:
we need your help. I also not find any document about the cq
channel event fd and
cm channel event f.
Should we set the events to G_IO_IN | G_IO_HUP | G_IO_ERR? or
G_IO_IN is enough?
pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
Thanks.

>
>> ---
>>  migration/rdma.c | 35 ---
>>  1 file changed, 24 insertions(+), 11 deletions(-)
>>
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 1b9e261..d611a06 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -1489,6 +1489,9 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, 
>> uint64_t *wr_id_out,
>>   */
>>  static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
>>  {
>> +struct rdma_cm_event *cm_event;
>> +int ret = -1;
>> +
>>  /*
>>   * Coroutine doesn't start until migration_fd_process_incoming()
>>   * so don't yield unless we know we're running inside of a coroutine.
>> @@ -1504,25 +1507,35 @@ static int qemu_rdma_wait_comp_channel(RDMAContext 
>> *rdma)
>>   * But we need to be able to handle 'cancel' or an error
>>   * without hanging forever.
>>   */
>> -while (!rdma->error_state  && !rdma->received_error) {
>> -GPollFD pfds[1];
>> +while (!rdma->error_state && !rdma->received_error) {
>> +GPollFD pfds[2];
>>  pfds[0].fd = rdma->comp_channel->fd;
>>  pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>> +pfds[0].revents = 0;
>> +
>> +pfds[1].fd = rdma->channel->fd;
>> +pfds[1].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
>> +pfds[1].revents = 0;
>> +
>>  /* 0.1s timeout, should be fine for a 'cancel' */
>> -switch (qemu_poll_ns(pfds, 1, 100 * 1000 * 1000)) {
>> -case 1: /* fd active */
>> -return 0;
>> +qemu_poll_ns(pfds, 2, 100 * 1000 * 1000);
>
> Shouldn't we still check the return value of this; if it's negative
> something has gone wrong.

I will fix this.
Thanks.

>
> Dave
>
>> -case 0: /* Timeout, go around again */
>> -break;
>> +if (pfds[1].revents) {
>> +ret = rdma_get_cm_event(rdma->channel, &cm_event);
>> +if (!ret) {
>> +rdma_ack_cm_event(cm_event);
>> +}
>> +error_report("receive cm event while wait comp channel,"
>> + "cm event is %d", cm_event->event);
>>
>> -default: /* Error of some type -
>> -  * I don't trust errno from qemu_poll_ns
>> - */
>> -error_report("%s: poll failed", __func__);
>> +/* consider any rdma communication event as an error */
>>  return -EPIPE;
>>  }
>>
>> +if (pfds[0].revents) {
>> +return 0;
>> +}
>> +
>>  if (migrate_get_current()->state == 
>> MIGRATION_STATUS_CANCELLING) {
>>  /* Bail out and let the cancellation happen */
>>  return -EPIPE;
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Cédric Le Goater
On 05/30/2018 04:42 PM, Joel Stanley wrote:
> The powerpc Linux kernel[1] and skiboot firmware[2] recently gained changes
> that cause the Processor Compatibility Register (PCR) SPR to be cleared.
> 
> These changes cause Linux to fail to boot on the Qemu powernv machine
> with an error:
> 
>  Trying to write privileged spr 338 (0x152) at 30017f0c
> 
> With this patch Qemu makes this register available as a hypervisor
> privileged register.
> 
> Note that bits set in this register disable features of the processor.
> Currently the only register state that is supported is when the register
> is zeroed (enable all features). This is sufficient for guests to
> once again boot.
> 
> [1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
> [2] https://patchwork.ozlabs.org/patch/915932/
> 
> Signed-off-by: Joel Stanley 
> ---
>  target/ppc/helper.h |  1 +
>  target/ppc/misc_helper.c| 10 ++
>  target/ppc/translate_init.inc.c |  9 +++--
>  3 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index 19453c68138a..d751f0e21909 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
>  DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  DEF_HELPER_2(store_lpcr, void, env, tl)
> +DEF_HELPER_2(store_pcr, void, env, tl)
>  #endif
>  DEF_HELPER_1(check_tlb_flush_local, void, env)
>  DEF_HELPER_1(check_tlb_flush_global, void, env)
> diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
> index 8c8cba5cc6f1..40c39d08ad14 100644
> --- a/target/ppc/misc_helper.c
> +++ b/target/ppc/misc_helper.c
> @@ -20,6 +20,7 @@
>  #include "cpu.h"
>  #include "exec/exec-all.h"
>  #include "exec/helper-proto.h"
> +#include "qemu/error-report.h"
>  
>  #include "helper_regs.h"
>  
> @@ -186,6 +187,15 @@ void ppc_store_msr(CPUPPCState *env, target_ulong value)
>  hreg_store_msr(env, value, 0);
>  }
>  
> +void helper_store_pcr(CPUPPCState *env, target_ulong value)
> +{
> +if (value != 0) {
> +error_report("Unimplemented PCR value 0x"TARGET_FMT_lx, value);
> +return;
> +}
> +env->spr[SPR_PCR] = value;

shouldn't we use pcc->pcr_mask ? and check pcc->pcr_supported also ? 

C.

> +}
> +
>  /* This code is lifted from MacOnLinux. It is called whenever
>   * THRM1,2 or 3 is read an fixes up the values in such a way
>   * that will make MacOS not hang. These registers exist on some
> diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
> index ab782cb32aaa..bebe6ec5333c 100644
> --- a/target/ppc/translate_init.inc.c
> +++ b/target/ppc/translate_init.inc.c
> @@ -456,6 +456,10 @@ static void spr_write_hid0_601(DisasContext *ctx, int 
> sprn, int gprn)
>  /* Must stop the translation as endianness may have changed */
>  gen_stop_exception(ctx);
>  }
> +static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
> +{
> +gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
> +}
>  #endif
>  
>  /* Unified bats */
> @@ -7957,11 +7961,12 @@ static void gen_spr_power6_common(CPUPPCState *env)
>  #endif
>  /*
>   * Register PCR to report POWERPC_EXCP_PRIV_REG instead of
> - * POWERPC_EXCP_INVAL_SPR.
> + * POWERPC_EXCP_INVAL_SPR in userspace. Permit hypervisor access.
>   */
> -spr_register(env, SPR_PCR, "PCR",
> +spr_register_hv(env, SPR_PCR, "PCR",
>   SPR_NOACCESS, SPR_NOACCESS,
>   SPR_NOACCESS, SPR_NOACCESS,
> + &spr_read_generic, &spr_write_pcr,
>   0x);
>  }
>  
> 




[Qemu-devel] [PATCH 1/3] io: Implement QIO_CHANNEL_ERR_BROKEN

2018-05-31 Thread Sergio Lopez
QIO_CHANNEL_ERR_BROKEN is used to identify a potentially unrecoverable
error in the channel, like EPIPE.
---
 include/io/channel.h | 1 +
 io/channel-file.c| 3 +++
 2 files changed, 4 insertions(+)

diff --git a/include/io/channel.h b/include/io/channel.h
index e8cdadb..bbe45f6 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -38,6 +38,7 @@ typedef struct QIOChannel QIOChannel;
 typedef struct QIOChannelClass QIOChannelClass;
 
 #define QIO_CHANNEL_ERR_BLOCK -2
+#define QIO_CHANNEL_ERR_BROKEN -3
 
 typedef enum QIOChannelFeature QIOChannelFeature;
 
diff --git a/io/channel-file.c b/io/channel-file.c
index db948ab..a990f67 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -124,6 +124,9 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
 if (errno == EAGAIN) {
 return QIO_CHANNEL_ERR_BLOCK;
 }
+if (errno == EPIPE) {
+return QIO_CHANNEL_ERR_BROKEN;
+}
 if (errno == EINTR) {
 goto retry;
 }
-- 
1.8.3.1




[Qemu-devel] [PATCH 3/3] hw/char/serial: Don't retry on serial_xmit if errno == EPIPE

2018-05-31 Thread Sergio Lopez
If writing to the frontend channel failed with EPIPE, don't set up a
retry. EPIPE is not a recoverable error, so trying again is a waste of CPU
cycles.

If the vCPU writing to the serial device and emulator thread are pinned
to the same pCPU, it can also compromise the stability of the Guest OS,
as both threads will be competing for pCPU's time, with the vCPU
actively polling the serial device and barely giving time to the
emulator thread to make actual progress.
---
 hw/char/serial.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/char/serial.c b/hw/char/serial.c
index 2c080c9..f26e86b 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -262,6 +262,7 @@ static void serial_xmit(SerialState *s)
 /* in loopback mode, say that we just received a char */
 serial_receive1(s, &s->tsr, 1);
 } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
+   errno != EPIPE &&
s->tsr_retry < MAX_XMIT_RETRY) {
 assert(s->watch_tag == 0);
 s->watch_tag =
-- 
1.8.3.1




[Qemu-devel] [PATCH 2/3] chardev: On QIO_CHANNEL_ERR_BROKEN set errno to EPIPE

2018-05-31 Thread Sergio Lopez
This allows callers to identify this potentially unrecoverable error.
---
 chardev/char-io.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/chardev/char-io.c b/chardev/char-io.c
index f810524..f934eb9 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -168,6 +168,9 @@ int io_channel_send_full(QIOChannel *ioc,
 
 errno = EAGAIN;
 return -1;
+} else if (ret == QIO_CHANNEL_ERR_BROKEN) {
+errno = EPIPE;
+return -1;
 } else if (ret < 0) {
 errno = EINVAL;
 return -1;
-- 
1.8.3.1




[Qemu-devel] [PATCH 0/3] Avoid retrying on serial_xmit with EPIPE

2018-05-31 Thread Sergio Lopez
EPIPE is not a recoverable error, so retrying on serial_xmit is a waste of CPU
cycles, and can potentially comprosie the Guest OS stability if both the vCPU
and the emulator thread are pinned to the same pCPU, with the first one
actively polling the serial device and barely giving time to the second to make
any actual progress.

Sergio Lopez (3):
  io: Implement QIO_CHANNEL_ERR_BROKEN
  chardev: On QIO_CHANNEL_ERR_BROKEN set errno to EPIPE
  hw/char/serial: Don't retry on serial_xmit if errno == EPIPE

 chardev/char-io.c| 3 +++
 hw/char/serial.c | 1 +
 include/io/channel.h | 1 +
 io/channel-file.c| 3 +++
 4 files changed, 8 insertions(+)

-- 
1.8.3.1




Re: [Qemu-devel] [PATCH] KVM: GIC: Fix memory leak due to calling kvm_init_irq_routing twice

2018-05-31 Thread Auger Eric
Hi Shannon,

On 05/31/2018 09:16 AM, Shannon Zhao wrote:
> kvm_irqchip_create called by kvm_init will call kvm_init_irq_routing to
> initialize global capability variables. If we call kvm_init_irq_routing in
> GIC realize function, previous allocated memory will leak.
> 
> Fix this by deleting the unnecessary call.
> 
> Signed-off-by: Shannon Zhao 
openpic_kvm seems to suffer the same leak. Don't you want to fix it as
well?

Reviewed-by: Eric Auger 

Thanks

Eric
> ---
>  hw/intc/arm_gic_kvm.c   | 1 -
>  hw/intc/arm_gicv3_kvm.c | 1 -
>  2 files changed, 2 deletions(-)
> 
> diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
> index 6f467e6..204369d 100644
> --- a/hw/intc/arm_gic_kvm.c
> +++ b/hw/intc/arm_gic_kvm.c
> @@ -572,7 +572,6 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
> **errp)
>  
>  if (kvm_has_gsi_routing()) {
>  /* set up irq routing */
> -kvm_init_irq_routing(kvm_state);
>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>  }
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 001d82b..aa4c7c5 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -813,7 +813,6 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
> **errp)
>  
>  if (kvm_has_gsi_routing()) {
>  /* set up irq routing */
> -kvm_init_irq_routing(kvm_state);
>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>  }
> 



Re: [Qemu-devel] [Qemu-block] [PATCH 0/3] ahci: fix completion race condition

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 31, 2018 at 1:43 AM, John Snow  wrote:
> Commit d759c951f changed the main thread lock release/reacquisition,
> and in so doing apparently jostled loose a race condition in the AHCI
> code.
>
> Patch 2 should be sufficient to fix this, and patches 1 and 3 are just
> little trivial fixes.
>
> This might be sufficient to fix the bug as reported at
> https://bugs.launchpad.net/qemu/+bug/1769189
> but the nature of the timing changes make it difficult to confirm,
> so I am posting this patchset for the reporters to help test.
>
> John Snow (3):
>   ahci: trim signatures on raise/lower
>   ahci: fix PxCI register race
>   ahci: don't schedule unnecessary BH
>
>  hw/ide/ahci.c | 24 +++-
>  1 file changed, 11 insertions(+), 13 deletions(-)

Nice find!

Reviewed-by: Stefan Hajnoczi 



Re: [Qemu-devel] [PATCH] KVM: GIC: Fix memory leak due to calling kvm_init_irq_routing twice

2018-05-31 Thread Shannon Zhao



On 2018/5/31 15:54, Auger Eric wrote:
> Hi Shannon,
> 
> On 05/31/2018 09:16 AM, Shannon Zhao wrote:
>> kvm_irqchip_create called by kvm_init will call kvm_init_irq_routing to
>> initialize global capability variables. If we call kvm_init_irq_routing in
>> GIC realize function, previous allocated memory will leak.
>>
>> Fix this by deleting the unnecessary call.
>>
>> Signed-off-by: Shannon Zhao 
> openpic_kvm seems to suffer the same leak. Don't you want to fix it as
> well?
> 
I have a look at below patch of openpic_kvm which says on ppc it doesn't
call kvm_irqchip_create. So no such issue for it.

commit d85937e683f6ff4d68293cb24c780fb1f6820d2c
Author: Scott Wood 
Date:   Wed Jun 12 15:32:51 2013 -0500

kvm/openpic: in-kernel mpic support

> Reviewed-by: Eric Auger 
> 
Thanks.

> Thanks
> 
> Eric
>> ---
>>  hw/intc/arm_gic_kvm.c   | 1 -
>>  hw/intc/arm_gicv3_kvm.c | 1 -
>>  2 files changed, 2 deletions(-)
>>
>> diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
>> index 6f467e6..204369d 100644
>> --- a/hw/intc/arm_gic_kvm.c
>> +++ b/hw/intc/arm_gic_kvm.c
>> @@ -572,7 +572,6 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
>> **errp)
>>  
>>  if (kvm_has_gsi_routing()) {
>>  /* set up irq routing */
>> -kvm_init_irq_routing(kvm_state);
>>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>>  }
>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>> index 001d82b..aa4c7c5 100644
>> --- a/hw/intc/arm_gicv3_kvm.c
>> +++ b/hw/intc/arm_gicv3_kvm.c
>> @@ -813,7 +813,6 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, 
>> Error **errp)
>>  
>>  if (kvm_has_gsi_routing()) {
>>  /* set up irq routing */
>> -kvm_init_irq_routing(kvm_state);
>>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>>  }
>>
> 
> .
> 

-- 
Shannon




[Qemu-devel] [Bug 1404278] Re: tap connections not working on windows host

2018-05-31 Thread timsoft
it is working now. using the following config.
"c:\program files\qemu\qemu-system-i386.exe" -cpu qemu32 -m 3G -drive 
file="c:\\data\\images\\slack14.2_32bit_base.qcow2",format=qcow2 -cdrom 
"c:\\data\\images\\slackware-14.2-install-dvd.iso" -boot c -net 
nic,macaddr=02:00:00:11:11:14,model=i82551 -net tap,ifname=tap0 -k en-gb 
-display vnc=:2 -monitor telnet:localhost:7101,server,nowait,nodelay
which I took from a linux qemu vm of mine, and just modified the bridge to 
point to tap (it didn't like -net bridge,br=br0 (where br0 was my windows 
bridge - I guess there is no bridge helper on windows qemu - so I changed it to 
-net tap,ifname=tap0 (which was already configured as part of the bridge)
I was able to ping the lan and also the wan (internet) so it looks ok now. This 
time i did specify a different virtual net card which may have made a 
difference, but if it works, that is the main thing.

** Changed in: qemu
   Status: Incomplete => Fix Released

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1404278

Title:
  tap connections not working on windows host

Status in QEMU:
  Fix Released

Bug description:
  using latest qemu 2.2.0 64bit for windows host (installed from
  qemu-w64-setup-20141210.exe obtained from http://qemu.weilnetz.de/w64/
  ),OpenVPN 2.6.3-I601 64bit tap adapter named tap01 and calling qemu
  using the following.

  qemu-system-x86_64.exe -m 512 -net nic -net tap,ifname=tap01 -hda
  "c:\\data\\images\\test.img"

  where the image contains a slackware 14.0 64bit install.
  The tap is bridged with the real network adapter and the bridge is given an 
ip of 10.1.1.41 (which works as the ip for the windows host). The tap adapter 
(in network connections) shows connected when the qemu vm is running. inside 
the vm, the network is given an ip of 10.1.1.143 (the netmask and default 
gateway are the same for the virtual and real pc).
  fault.
  The vm cannot see the rest of the local network or visa-versa. This used to 
work in early (0.9 32bit) versions of qemu.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1404278/+subscriptions



[Qemu-devel] [Bug 1404278] Re: tap connections not working on windows host

2018-05-31 Thread timsoft
addition to previous comment. it works with qemu-w64-setup-20180519.exe
I haven't tested it with in between versions of qemu.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1404278

Title:
  tap connections not working on windows host

Status in QEMU:
  Fix Released

Bug description:
  using latest qemu 2.2.0 64bit for windows host (installed from
  qemu-w64-setup-20141210.exe obtained from http://qemu.weilnetz.de/w64/
  ),OpenVPN 2.6.3-I601 64bit tap adapter named tap01 and calling qemu
  using the following.

  qemu-system-x86_64.exe -m 512 -net nic -net tap,ifname=tap01 -hda
  "c:\\data\\images\\test.img"

  where the image contains a slackware 14.0 64bit install.
  The tap is bridged with the real network adapter and the bridge is given an 
ip of 10.1.1.41 (which works as the ip for the windows host). The tap adapter 
(in network connections) shows connected when the qemu vm is running. inside 
the vm, the network is given an ip of 10.1.1.143 (the netmask and default 
gateway are the same for the virtual and real pc).
  fault.
  The vm cannot see the rest of the local network or visa-versa. This used to 
work in early (0.9 32bit) versions of qemu.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1404278/+subscriptions



Re: [Qemu-devel] [libvirt] [PATCH 2/5] qemu: Move checks for SMM from command-line creation into validation phase

2018-05-31 Thread Martin Kletzander

On Thu, May 31, 2018 at 09:33:54AM +0200, Laszlo Ersek wrote:

adding qemu-devel, Paolo and Gerd; comments below:

On 05/30/18 23:08, Martin Kletzander wrote:

On Wed, May 30, 2018 at 11:02:59AM -0400, John Ferlan wrote:



On 05/21/2018 11:00 AM, Martin Kletzander wrote:

We are still hoping all of such checks will be moved there and this
is one small
step in that direction.

One of the things that this is improving is the error message you get
when
starting a domain with SMM and i440fx, for example.  Instead of
saying that the
QEMU binary doesn't support that option, we correctly say that it is
only
supported with q35 machine type.

Signed-off-by: Martin Kletzander 
---
 src/qemu/qemu_capabilities.c | 21 +++--
 src/qemu/qemu_capabilities.h |  4 ++--
 src/qemu/qemu_command.c  | 12 ++--
 src/qemu/qemu_domain.c   | 12 +---
 4 files changed, 28 insertions(+), 21 deletions(-)



I know it's outside the bounds of what you're doing; however,
qemuDomainDefValidateFeatures could check the capabilities for other
bits too...



Probably, but I mostly wanted to do that because SMM is not only about the
capability, but also about the machine.  Good idea for the future, though.


[...]


diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index d3beee5d8760..881d0ea46a75 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -3430,7 +3430,8 @@ qemuDomainDefGetVcpuHotplugGranularity(const
virDomainDef *def)


 static int
-qemuDomainDefValidateFeatures(const virDomainDef *def)
+qemuDomainDefValidateFeatures(const virDomainDef *def,
+  virQEMUCapsPtr qemuCaps)
 {
 size_t i;

@@ -3477,6 +3478,12 @@ qemuDomainDefValidateFeatures(const
virDomainDef *def)
 }
 break;

+    case VIR_DOMAIN_FEATURE_SMM:
+    if (def->features[i] == VIR_TRISTATE_SWITCH_ON &&


Probably should change to != _ABSENT, since qemu_command will supply
smm={on|off}



That makes sense, kind of.  For 'off' we only need to check if we can
specify
the smm= option.  The thing is that you can even specify smm=on with
non-q35
machine type, but it is unclear what that's going to mean since it doesn't
really make sense.

@Laszlo: What would you say?  Should we allow users to specify smm=on
for users?
Or even better, does it makes sense to allow specifying smm=anything for
non-q35
machine types?  If not, we'll leave it like this, that is smm=anything is
forbidden for non-q35 machine types.


Technically the "smm" machine type property is defined for both i440fx
and q35:

$ qemu-system-x86_64 -machine pc,\? 2>&1 | grep smm
pc-i440fx-2.11.smm=OnOffAuto (Enable SMM (pc & q35))

$ qemu-system-x86_64 -machine q35,\? 2>&1 | grep smm
pc-q35-2.11.smm=OnOffAuto (Enable SMM (pc & q35))

The "smm" property controls whether system management mode is emulated,
to my knowledge. That's an x86 processor operating mode, which isn't
specific to the Q35 board. What's specific to Q35 is the TSEG.

The primary reason why OVMF (built with the edk2 SMM driver stack)
requires Q35 is not that i440fx does not emulate SMM, it's that i440fx
doesn't have a large enough SMRAM area. (The legacy SMRAM areas are too
small for OVMF.)

For example, SeaBIOS too includes some SMM code (optionally, if it's
built like that), and that runs on i440fx just fine, because the legacy
SMRAM areas are large enough for SeaBIOS's purposes, AIUI.

(Now, there's at least one other reason why OVMF/SMM needs Q35: because
the SMI broadcast feature too is only implemented on Q35. But that's
rather a consequence of the above "primary reason", and not a
stand-alone reason itself -- we restricted the SMI broadcast feature to
Q35 *because* OVMF could only run on Q35. Anyhow, you need not care
about SMI broadcast because it's automatically negotiated between guest
fw and QEMU.)

Summary:

(1) For making Secure Boot actually secure in OVMF, OVMF needs to be
built with -D SMM_REQUIRE, so that it include the SMM driver stack.

(2) Booting such a firmware binary requires Q35, because only Q35 offers
TSEG, and the legacy -- non-TSEG -- SMRAM ranges are too small even for
a "basic boot" of OVMF.

(3) QEMU has to be configured with "-machine smm=on"; this is already
covered by libvirt via .

(4) QEMU has to be configured to restrict pflash writes to code that
executes in SMM, with "-global
driver=cfi.pflash01,property=secure,value=on". Libvirt covers this
already with the @secure=yes attribute in .

(5) There are two *quality of service* features related to SMM; with
both of them being Q35-only. The first feature is SMI broadcast; libvirt
need not care because it's auto-negotiated.

(6) The second QoS feature is *extended* TSEG (a paravirt feature on top
of the standard TSEG), which lets the edk2 SMM driver stack scale to
large RAM sizes and high VCPU counts. Libvirt should let the user
configure the extended TSEG size, because the necessary minimum is super
difficult to calculate (and one "maximum 

Re: [Qemu-devel] [PATCH 2/2] linux-user: Fix struct sigaltstack for openrisc

2018-05-31 Thread Laurent Vivier
Le 31/05/2018 à 06:18, Richard Henderson a écrit :
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/openrisc/target_signal.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/linux-user/openrisc/target_signal.h 
> b/linux-user/openrisc/target_signal.h
> index 2a4e00b035..901097972a 100644
> --- a/linux-user/openrisc/target_signal.h
> +++ b/linux-user/openrisc/target_signal.h
> @@ -7,8 +7,8 @@
>  
>  typedef struct target_sigaltstack {
>  abi_long ss_sp;
> +abi_int ss_flags;
>  abi_ulong ss_size;
> -abi_long ss_flags;
>  } target_stack_t;
>  
>  /* sigaltstack controls  */
> 

Reviewed-by: Laurent Vivier 



Re: [Qemu-devel] [PATCH v9 5/7] monitor: remove event_clock_type

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 31, 2018 at 12:11:47PM +0800, Peter Xu wrote:
> On Wed, May 30, 2018 at 05:35:52PM +0100, Stefan Hajnoczi wrote:
> > On Tue, May 29, 2018 at 01:57:53PM +0800, Peter Xu wrote:
> > > Instead, use a dynamic function to detect which clock we'll use.  The
> > > problem is that the old code will let monitor initialization depends on
> > > qtest_enabled().  After this change, we don't have such a dependency any
> > > more.
> > 
> > There is a hidden dependency:
> > 
> >   monitor_get_clock() returns the wrong value before main() has
> >   processed command-line arguments.
> 
> To be more explicit:
> 
> monitor_get_clock() returns the wrong value before accelerator is
> correctly setup (in configure_accelerator()).
> 
> Since the only thing that matters here is whether we're using the
> qtest accelerator.
> 
> > 
> > Where is the guarantee that monitor_get_clock() is never called too
> > early?
> 
> You are right, there is no guarantee except from programming POV.
> It's only used in:
> 
>   monitor_qapi_event_queue
>   monitor_qapi_event_handler
> 
> These two functions will never be called until accelerator is setup.
> 
> > 
> > At the least, monitor_get_clock() should call abort(3) if invoked too
> > early.  Even better would be an interface that cannot be used
> > incorrectly.
> 
> Maybe then I should export the accel_initialised variable in
> configure_accelerator() and then I assert with that.  But that'll
> further expand the series a bit.
> 
> Or, I can also mention above in the commit message to explain that a
> bit.

Documentation is okay, but please do it in the code, not the commit
message.  That way anyone looking at monitor_get_clock() will be aware
of the constraint.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH] KVM: GIC: Fix memory leak due to calling kvm_init_irq_routing twice

2018-05-31 Thread Auger Eric
Hi Shannon,

On 05/31/2018 10:04 AM, Shannon Zhao wrote:
> 
> 
> On 2018/5/31 15:54, Auger Eric wrote:
>> Hi Shannon,
>>
>> On 05/31/2018 09:16 AM, Shannon Zhao wrote:
>>> kvm_irqchip_create called by kvm_init will call kvm_init_irq_routing to
>>> initialize global capability variables. If we call kvm_init_irq_routing in
>>> GIC realize function, previous allocated memory will leak.
>>>
>>> Fix this by deleting the unnecessary call.
>>>
>>> Signed-off-by: Shannon Zhao 
>> openpic_kvm seems to suffer the same leak. Don't you want to fix it as
>> well?
>>
> I have a look at below patch of openpic_kvm which says on ppc it doesn't
> call kvm_irqchip_create. So no such issue for it.
> 
> commit d85937e683f6ff4d68293cb24c780fb1f6820d2c
> Author: Scott Wood 
> Date:   Wed Jun 12 15:32:51 2013 -0500
> 
> kvm/openpic: in-kernel mpic support

Ah OK. Thanks for the pointer.

Eric
> 
>> Reviewed-by: Eric Auger 
>>
> Thanks.
> 
>> Thanks
>>
>> Eric
>>> ---
>>>  hw/intc/arm_gic_kvm.c   | 1 -
>>>  hw/intc/arm_gicv3_kvm.c | 1 -
>>>  2 files changed, 2 deletions(-)
>>>
>>> diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
>>> index 6f467e6..204369d 100644
>>> --- a/hw/intc/arm_gic_kvm.c
>>> +++ b/hw/intc/arm_gic_kvm.c
>>> @@ -572,7 +572,6 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error 
>>> **errp)
>>>  
>>>  if (kvm_has_gsi_routing()) {
>>>  /* set up irq routing */
>>> -kvm_init_irq_routing(kvm_state);
>>>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>>>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>>>  }
>>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>>> index 001d82b..aa4c7c5 100644
>>> --- a/hw/intc/arm_gicv3_kvm.c
>>> +++ b/hw/intc/arm_gicv3_kvm.c
>>> @@ -813,7 +813,6 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, 
>>> Error **errp)
>>>  
>>>  if (kvm_has_gsi_routing()) {
>>>  /* set up irq routing */
>>> -kvm_init_irq_routing(kvm_state);
>>>  for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
>>>  kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
>>>  }
>>>
>>
>> .
>>
> 



Re: [Qemu-devel] [PATCH v9 5/7] monitor: remove event_clock_type

2018-05-31 Thread Peter Xu
On Thu, May 31, 2018 at 09:23:24AM +0100, Stefan Hajnoczi wrote:
> On Thu, May 31, 2018 at 12:11:47PM +0800, Peter Xu wrote:
> > On Wed, May 30, 2018 at 05:35:52PM +0100, Stefan Hajnoczi wrote:
> > > On Tue, May 29, 2018 at 01:57:53PM +0800, Peter Xu wrote:
> > > > Instead, use a dynamic function to detect which clock we'll use.  The
> > > > problem is that the old code will let monitor initialization depends on
> > > > qtest_enabled().  After this change, we don't have such a dependency any
> > > > more.
> > > 
> > > There is a hidden dependency:
> > > 
> > >   monitor_get_clock() returns the wrong value before main() has
> > >   processed command-line arguments.
> > 
> > To be more explicit:
> > 
> > monitor_get_clock() returns the wrong value before accelerator is
> > correctly setup (in configure_accelerator()).
> > 
> > Since the only thing that matters here is whether we're using the
> > qtest accelerator.
> > 
> > > 
> > > Where is the guarantee that monitor_get_clock() is never called too
> > > early?
> > 
> > You are right, there is no guarantee except from programming POV.
> > It's only used in:
> > 
> >   monitor_qapi_event_queue
> >   monitor_qapi_event_handler
> > 
> > These two functions will never be called until accelerator is setup.
> > 
> > > 
> > > At the least, monitor_get_clock() should call abort(3) if invoked too
> > > early.  Even better would be an interface that cannot be used
> > > incorrectly.
> > 
> > Maybe then I should export the accel_initialised variable in
> > configure_accelerator() and then I assert with that.  But that'll
> > further expand the series a bit.
> > 
> > Or, I can also mention above in the commit message to explain that a
> > bit.
> 
> Documentation is okay, but please do it in the code, not the commit
> message.  That way anyone looking at monitor_get_clock() will be aware
> of the constraint.

Sure.

-- 
Peter Xu



Re: [Qemu-devel] [PATCH 2/3] hw/arm/virt: Add another UART to the virt board

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 03:12, Jason A. Donenfeld  wrote:
> Hi Peter,
>
> What ever became of this? I still really could use a second UART in
> the standard configuration. I'm still building with this cludge --
> https://א.cc/RXI3ssWV -- in order to run the test suite on
> build.wireguard.com.

It stalled on the fact that adding the second UART breaks
UEFI images (annoyingly, UEFI iterates through UARTs in
the DTB in the opposite direction to Linux, so if you add
a second UART then it picks that one to use rather than
the first one, IIRC). So it would have to be an awkward
thing with a new machine option to request the extra UART.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v2] ui/cocoa: Suppress NSFileHandlingPanelOKButton deprecation warning

2018-05-31 Thread Peter Maydell
On 29 May 2018 at 19:23, Programmingkid  wrote:
>
>> On May 29, 2018, at 2:15 PM, Peter Maydell  wrote:
>>
>> OSX 10.13 deprecates the NSFileHandlingPanelOKButton constant, and
>> would rather you use NSModalResponseOK, which was introduced in OS 10.9.
>> Use the recommended new constant name, with a backward compatibility
>> define if we're building on an older OSX.
>>
>> Signed-off-by: Peter Maydell 
>> ---
>> v1->v2: define MAC_OS_X_VERSION_10_9 if the system headers don't;
>> fixes compilation on versions before 10.9.
>> ---
>> ui/cocoa.m | 12 +++-
>> 1 file changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/ui/cocoa.m b/ui/cocoa.m
>> index 110b393e4e..2991ed4f19 100644
>> --- a/ui/cocoa.m
>> +++ b/ui/cocoa.m
>> @@ -44,6 +44,9 @@
>> #ifndef MAC_OS_X_VERSION_10_6
>> #define MAC_OS_X_VERSION_10_6 1060
>> #endif
>> +#ifndef MAC_OS_X_VERSION_10_9
>> +#define MAC_OS_X_VERSION_10_9 1090
>> +#endif
>> #ifndef MAC_OS_X_VERSION_10_10
>> #define MAC_OS_X_VERSION_10_10 101000
>> #endif
>> @@ -79,6 +82,13 @@
>> #define NSWindowStyleMaskMiniaturizable NSMiniaturizableWindowMask
>> #define NSWindowStyleMaskTitled NSTitledWindowMask
>> #endif
>> +/* 10.13 deprecates NSFileHandlingPanelOKButton in favour of
>> + * NSModalResponseOK, which was introduced in 10.9. Define
>> + * it for older versions.
>> + */
>> +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_9
>> +#define NSModalResponseOK NSFileHandlingPanelOKButton
>> +#endif
>>
>> //#define DEBUG
>>
>> @@ -1218,7 +1228,7 @@ QemuCocoaView *cocoaView;
>> [openPanel setCanChooseFiles: YES];
>> [openPanel setAllowsMultipleSelection: NO];
>> [openPanel setAllowedFileTypes: supportedImageFileTypes];
>> -if([openPanel runModal] == NSFileHandlingPanelOKButton) {
>> +if([openPanel runModal] == NSModalResponseOK) {
>> NSString * file = [[[openPanel URLs] objectAtIndex: 0] path];
>> if(file == nil) {
>> NSBeep();
>> --
>> 2.17.0
>>
>
> It works :)
>
> Reviewed-by: John Arbuckle 

Thanks; applied this version to master.

-- PMM



Re: [Qemu-devel] [PATCH 1/2] hw/arm/virt: Add a new 256MB ECAM region

2018-05-31 Thread Laszlo Ersek
On 05/31/18 08:55, Auger Eric wrote:
> Hi Laszlo,
>
> On 05/30/2018 06:11 PM, Laszlo Ersek wrote:
>> On 05/30/18 16:26, Eric Auger wrote:
>>> This patch defines a new ECAM region located after the 256GB limit.
>>>
>>> The virt machine state is augmented with a new highmem_ecam field
>>> which guards the usage of this new ECAM region instead of the legacy
>>> 16MB one. With the highmem ECAM region, up to 256 PCIe buses can be
>>> used.
>>>
>>> Signed-off-by: Eric Auger 
>>>
>>> ---
>>>
>>> RFC -> PATCH:
>>> - remove the newline at the end of acpi_dsdt_add_pci
>>> - use vms->highmem_ecam to select the memmap id
>>> ---
>>>  hw/arm/virt-acpi-build.c | 21 +
>>>  hw/arm/virt.c| 12 
>>>  include/hw/arm/virt.h|  2 ++
>>>  3 files changed, 23 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
>>> index 92ceee9..c0370b2 100644
>>> --- a/hw/arm/virt-acpi-build.c
>>> +++ b/hw/arm/virt-acpi-build.c
>>> @@ -150,16 +150,17 @@ static void acpi_dsdt_add_virtio(Aml *scope,
>>>  }
>>>
>>>  static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
>>> -  uint32_t irq, bool use_highmem)
>>> +  uint32_t irq, bool use_highmem, bool 
>>> highmem_ecam)
>>>  {
>>> +int ecam_id = highmem_ecam ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM;
>>>  Aml *method, *crs, *ifctx, *UUID, *ifctx1, *elsectx, *buf;
>>>  int i, bus_no;
>>>  hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base;
>>>  hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size;
>>>  hwaddr base_pio = memmap[VIRT_PCIE_PIO].base;
>>>  hwaddr size_pio = memmap[VIRT_PCIE_PIO].size;
>>> -hwaddr base_ecam = memmap[VIRT_PCIE_ECAM].base;
>>> -hwaddr size_ecam = memmap[VIRT_PCIE_ECAM].size;
>>> +hwaddr base_ecam = memmap[ecam_id].base;
>>> +hwaddr size_ecam = memmap[ecam_id].size;
>>>  int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;
>>>
>>>  Aml *dev = aml_device("%s", "PCI0");
>>> @@ -173,7 +174,7 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
>>> MemMapEntry *memmap,
>>>  aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
>>>
>>>  /* Declare the PCI Routing Table. */
>>> -Aml *rt_pkg = aml_package(nr_pcie_buses * PCI_NUM_PINS);
>>> +Aml *rt_pkg = aml_varpackage(nr_pcie_buses * PCI_NUM_PINS);
>>>  for (bus_no = 0; bus_no < nr_pcie_buses; bus_no++) {
>>>  for (i = 0; i < PCI_NUM_PINS; i++) {
>>>  int gsi = (i + bus_no) % PCI_NUM_PINS;
>>> @@ -316,7 +317,10 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
>>> MemMapEntry *memmap,
>>>  Aml *dev_res0 = aml_device("%s", "RES0");
>>>  aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02")));
>>>  crs = aml_resource_template();
>>> -aml_append(crs, aml_memory32_fixed(base_ecam, size_ecam, 
>>> AML_READ_WRITE));
>>> +aml_append(crs,
>>> +aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
>>> + AML_NON_CACHEABLE, AML_READ_WRITE, 0x, 
>>> base_ecam,
>>> + base_ecam + size_ecam - 1, 0x, size_ecam));
>>>  aml_append(dev_res0, aml_name_decl("_CRS", crs));
>>>  aml_append(dev, dev_res0);
>>>  aml_append(scope, dev);
>>> @@ -563,16 +567,17 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, 
>>> VirtMachineState *vms)
>>>  {
>>>  AcpiTableMcfg *mcfg;
>>>  const MemMapEntry *memmap = vms->memmap;
>>> +int ecam_id = vms->highmem_ecam ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM;
>>>  int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
>>>  int mcfg_start = table_data->len;
>>>
>>>  mcfg = acpi_data_push(table_data, len);
>>> -mcfg->allocation[0].address = cpu_to_le64(memmap[VIRT_PCIE_ECAM].base);
>>> +mcfg->allocation[0].address = cpu_to_le64(memmap[ecam_id].base);
>>>
>>>  /* Only a single allocation so no need to play with segments */
>>>  mcfg->allocation[0].pci_segment = cpu_to_le16(0);
>>>  mcfg->allocation[0].start_bus_number = 0;
>>> -mcfg->allocation[0].end_bus_number = (memmap[VIRT_PCIE_ECAM].size
>>> +mcfg->allocation[0].end_bus_number = (memmap[ecam_id].size
>>>/ PCIE_MMCFG_SIZE_MIN) - 1;
>>>
>>>  build_header(linker, table_data, (void *)(table_data->data + 
>>> mcfg_start),
>>> @@ -747,7 +752,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
>>> VirtMachineState *vms)
>>>  acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
>>>  (irqmap[VIRT_MMIO] + ARM_SPI_BASE), 
>>> NUM_VIRTIO_TRANSPORTS);
>>>  acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
>>> -  vms->highmem);
>>> +  vms->highmem, vms->highmem_ecam);
>>>  acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO],
>>> (irqmap[VIRT_GPIO] + ARM_SPI_BASE));
>>>  acpi_dsdt_add_power_button(scope);
>>> diff --git 

Re: [Qemu-devel] [PATCH v2 0/7] ramfb: simple boot framebuffer, no legacy vga

2018-05-31 Thread Gerd Hoffmann
  Hi,

Resuming an old discussion ...

> > From
> > the guests point of view there is no difference between
> > 
> >   (a) qemu -device virtio-ramfb, and
> >   (b) qemu -device virtio-gpu-pci -device ramfb-testdev
> > 
> > On the host side the difference is that (a) is a single QemuConsole
> > which shows virtio-gpu-pci once activated and ramfb otherwise, and
> > (b) is two QemuConsoles, so you can see both virtio-gpu-pci and ramfb
> > side-by-side, for debugging purposes.
> 
> Exactly this "multiple frontends, single backend" connection is the
> problem. In UEFI, it is possible to establish a priority order between
> drivers that are all capable of binding the same controller ("handle"),
> but especially with ramfb + another (PCI) video frontend, it's the
> "handles" that are different. The "priority mechanism" would have no
> idea that the drivers cannot peacefully coexist, i.e. it's clueless
> about the (host side only) competition.

Well, virtio-vga and qxl-vga are very simliar.  They both are two-in-one
devices, with legacy vga frontend and native (qxl/virtio) frontend
sharing a single backend.  When the guest initialized the native
frontend the backend switches over from vga to native.

> This is a problem because you display something on the one device, and
> the screen still shows the other, and users complain.

In ovmf the console splitter handles this reasonable well, firmware and
bootloader simply show up on all outputs.  Got the ramfb driver to the
point where it joins the party properly.

The early boot phase (efifb active for linux) is more problematic.  Only
one of the GOPs is used, and it seems whatever GOP was registered first
gets used.  Try boot a fedora live iso with (a) "-device VGA -device
secondary-vga" and (b) "-device secondary-vga -device VGA".  efifb shows
up on the first device (probably because it also is first in pci scan
order).  fbcon shows up on the VGA device.  So for (b) the boot output
switches the display when bochs-drm.ko loads.

> To my understanding, in Linux, special cross-driver hacks exist for
> this. For example, you boot with efifb (which is not a PCI device), and
> once an actual PCI display driver is configured, it is considered
> superior, and it supplants efifb.

Linux does two things here.

First it checks ressource ranges, and if there is a conflict the
firmware framebuffer (efifb in ovmf case, but vesafb and others on !x86
platforms are handled simliar) the firmware framebuffer driver is
unregistered, only the native framebuffer driver remains active.

Second one device is picked as "primary".  Basically linux tries to
figure which device was activated by the firmware, to prefer that for
fbcon.  This is what makes linux switch over in case (b) above, because
linux prefers VGA display devices here (-device secondary-vga is
PCI_CLASS_DISPLAY_OTHER).

The logic to pick the "primary" display is arch-specific.

> On a tangent: on the host side, we already have to entirely separate I/O
> consoles: some kind of graphics + emulated keyboard, plus serial. Those
> are always displayed in parallel -- not in two GUI toolkit windows, mind
> you, but it's never the case that one permanently "hides" the other.

Well, try "View / Detach tab" in the gtk ui in case you want them side
by side in two windows.  SDL2 uses one window per console too.

> If this was the case with ramfb + something else (i.e., writing to ramfb
> wouldn't be possible to "read back" for the other firmware driver, from
> the PCI framebuffer, and one could switch back and forth between ramfb
> display and PCI display on their desktop), then it would be OK for both
> firmware drivers to bind. Point is, the two back-ends need not occupy
> double the screen real estate; it would suffice if one didn't hide the
> other permanently, and users could switch at will.

I think it makes sense for the *-ramfb devices behave simliar to the
existing *-vga devices, which are *not* user-switchable between
frontends.

Given ovmf has virtio-gpu drivers I'm not sure we need virtio-ramfb at
all.

> I could imagine an OvmfPkg-specific PCI capability that said, "all PCI
> drivers in OvmfPkg that could otherwise drive this device, ignore it --
> another (platform) driver in OvmfPkg will pick it up instead".

pci capability for ramfb could be useful (also for linux).  I'll keep it
in mind for now.

cheers,
  Gerd




Re: [Qemu-devel] [PATCH 1/2] hw/arm/virt: Add a new 256MB ECAM region

2018-05-31 Thread Auger Eric
Hi Laszlo,

On 05/31/2018 10:41 AM, Laszlo Ersek wrote:
> On 05/31/18 08:55, Auger Eric wrote:
>> Hi Laszlo,
>>
>> On 05/30/2018 06:11 PM, Laszlo Ersek wrote:
>>> On 05/30/18 16:26, Eric Auger wrote:
 This patch defines a new ECAM region located after the 256GB limit.

 The virt machine state is augmented with a new highmem_ecam field
 which guards the usage of this new ECAM region instead of the legacy
 16MB one. With the highmem ECAM region, up to 256 PCIe buses can be
 used.

 Signed-off-by: Eric Auger 

 ---

 RFC -> PATCH:
 - remove the newline at the end of acpi_dsdt_add_pci
 - use vms->highmem_ecam to select the memmap id
 ---
  hw/arm/virt-acpi-build.c | 21 +
  hw/arm/virt.c| 12 
  include/hw/arm/virt.h|  2 ++
  3 files changed, 23 insertions(+), 12 deletions(-)

 diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
 index 92ceee9..c0370b2 100644
 --- a/hw/arm/virt-acpi-build.c
 +++ b/hw/arm/virt-acpi-build.c
 @@ -150,16 +150,17 @@ static void acpi_dsdt_add_virtio(Aml *scope,
  }

  static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
 -  uint32_t irq, bool use_highmem)
 +  uint32_t irq, bool use_highmem, bool 
 highmem_ecam)
  {
 +int ecam_id = highmem_ecam ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM;
  Aml *method, *crs, *ifctx, *UUID, *ifctx1, *elsectx, *buf;
  int i, bus_no;
  hwaddr base_mmio = memmap[VIRT_PCIE_MMIO].base;
  hwaddr size_mmio = memmap[VIRT_PCIE_MMIO].size;
  hwaddr base_pio = memmap[VIRT_PCIE_PIO].base;
  hwaddr size_pio = memmap[VIRT_PCIE_PIO].size;
 -hwaddr base_ecam = memmap[VIRT_PCIE_ECAM].base;
 -hwaddr size_ecam = memmap[VIRT_PCIE_ECAM].size;
 +hwaddr base_ecam = memmap[ecam_id].base;
 +hwaddr size_ecam = memmap[ecam_id].size;
  int nr_pcie_buses = size_ecam / PCIE_MMCFG_SIZE_MIN;

  Aml *dev = aml_device("%s", "PCI0");
 @@ -173,7 +174,7 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
 MemMapEntry *memmap,
  aml_append(dev, aml_name_decl("_CCA", aml_int(1)));

  /* Declare the PCI Routing Table. */
 -Aml *rt_pkg = aml_package(nr_pcie_buses * PCI_NUM_PINS);
 +Aml *rt_pkg = aml_varpackage(nr_pcie_buses * PCI_NUM_PINS);
  for (bus_no = 0; bus_no < nr_pcie_buses; bus_no++) {
  for (i = 0; i < PCI_NUM_PINS; i++) {
  int gsi = (i + bus_no) % PCI_NUM_PINS;
 @@ -316,7 +317,10 @@ static void acpi_dsdt_add_pci(Aml *scope, const 
 MemMapEntry *memmap,
  Aml *dev_res0 = aml_device("%s", "RES0");
  aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02")));
  crs = aml_resource_template();
 -aml_append(crs, aml_memory32_fixed(base_ecam, size_ecam, 
 AML_READ_WRITE));
 +aml_append(crs,
 +aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
 + AML_NON_CACHEABLE, AML_READ_WRITE, 0x, 
 base_ecam,
 + base_ecam + size_ecam - 1, 0x, size_ecam));
  aml_append(dev_res0, aml_name_decl("_CRS", crs));
  aml_append(dev, dev_res0);
  aml_append(scope, dev);
 @@ -563,16 +567,17 @@ build_mcfg(GArray *table_data, BIOSLinker *linker, 
 VirtMachineState *vms)
  {
  AcpiTableMcfg *mcfg;
  const MemMapEntry *memmap = vms->memmap;
 +int ecam_id = vms->highmem_ecam ? VIRT_PCIE_ECAM_HIGH : 
 VIRT_PCIE_ECAM;
  int len = sizeof(*mcfg) + sizeof(mcfg->allocation[0]);
  int mcfg_start = table_data->len;

  mcfg = acpi_data_push(table_data, len);
 -mcfg->allocation[0].address = 
 cpu_to_le64(memmap[VIRT_PCIE_ECAM].base);
 +mcfg->allocation[0].address = cpu_to_le64(memmap[ecam_id].base);

  /* Only a single allocation so no need to play with segments */
  mcfg->allocation[0].pci_segment = cpu_to_le16(0);
  mcfg->allocation[0].start_bus_number = 0;
 -mcfg->allocation[0].end_bus_number = (memmap[VIRT_PCIE_ECAM].size
 +mcfg->allocation[0].end_bus_number = (memmap[ecam_id].size
/ PCIE_MMCFG_SIZE_MIN) - 1;

  build_header(linker, table_data, (void *)(table_data->data + 
 mcfg_start),
 @@ -747,7 +752,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
 VirtMachineState *vms)
  acpi_dsdt_add_virtio(scope, &memmap[VIRT_MMIO],
  (irqmap[VIRT_MMIO] + ARM_SPI_BASE), 
 NUM_VIRTIO_TRANSPORTS);
  acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE),
 -  vms->highmem);
 +  vms->highmem, vms->highmem_ecam);

Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 11:19:13PM -0400, Arnabjyoti Kalita wrote:
> I am trying to implement a 'minimal' record-replay mechanism for KVM, which
> is similar to the one existing for TCG via -icount. I am trying to record
> I/O events only (specifically disk and network events) when KVM does a
> VMEXIT. This has led me to the function kvm_cpu_exec where I can clearly
> see the different ways of handling all of the possible VMExit cases (like
> PIO, MMIO etc.). To record network packets, I am working with the e1000
> hardware device.
> 
> Can I make sure that all of the network I/O, atleast for the e1000 device
> happens through the KVM_EXIT_MMIO case and subsequent use of the
> address_space_rw() function ? Do I also need to look at other functions as
> well ? Also for recording disk activity, can I make sure that looking out
> for the KVM_EXIT_MMIO and/or KVM_EXIT_PIO cases in the vmexit mechanism,
> will be enough ?
> 
> Let me know if there are other details that I need to take care of. I am
> using QEMU 2.11 on a x86-64 CPU and the guest runs a Linux Kernel 4.4 with
> Ubuntu 16.04.

I have CCed Pavel Dovgalyuk, the record/replay maintainer.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2 0/7] ramfb: simple boot framebuffer, no legacy vga

2018-05-31 Thread Laszlo Ersek
On 05/31/18 10:43, Gerd Hoffmann wrote:
>   Hi,
> 
> Resuming an old discussion ...
> 
>>> From
>>> the guests point of view there is no difference between
>>>
>>>   (a) qemu -device virtio-ramfb, and
>>>   (b) qemu -device virtio-gpu-pci -device ramfb-testdev
>>>
>>> On the host side the difference is that (a) is a single QemuConsole
>>> which shows virtio-gpu-pci once activated and ramfb otherwise, and
>>> (b) is two QemuConsoles, so you can see both virtio-gpu-pci and ramfb
>>> side-by-side, for debugging purposes.
>>
>> Exactly this "multiple frontends, single backend" connection is the
>> problem. In UEFI, it is possible to establish a priority order between
>> drivers that are all capable of binding the same controller ("handle"),
>> but especially with ramfb + another (PCI) video frontend, it's the
>> "handles" that are different. The "priority mechanism" would have no
>> idea that the drivers cannot peacefully coexist, i.e. it's clueless
>> about the (host side only) competition.
> 
> Well, virtio-vga and qxl-vga are very simliar.  They both are two-in-one
> devices, with legacy vga frontend and native (qxl/virtio) frontend
> sharing a single backend.  When the guest initialized the native
> frontend the backend switches over from vga to native.

True -- the difference is however, that the firmware doesn't even try to
drive the native QXL frontend; it's clueless about it.

IOW, in the (QXL, VGA) two-in-one device, the firmware only sees VGA,
because the firmware entirely (globally) ignores the QXL frontend; it
has no driver for native QXL.

We couldn't do that for virtio-gpu (we really wanted to drive it for
aarch64's sake), so the (virtio-gpu, VGA) two-in-one device required
special hacks, to prevent double-binding.

[snip]

>> I could imagine an OvmfPkg-specific PCI capability that said, "all PCI
>> drivers in OvmfPkg that could otherwise drive this device, ignore it --
>> another (platform) driver in OvmfPkg will pick it up instead".
> 
> pci capability for ramfb could be useful (also for linux).  I'll keep it
> in mind for now.

Please do. :)

When you brought up the PCI capability last time in this thread (and I
liked it), I realized that scanning for this new (likely "vendor")
capability would require me to code up the *third* PCI caplist scanning
loop in OVMF.

(Until that point we had implemented two such scans, one in the
virtio-1.0 driver, because virtio-1.0 uses vendor capabilities
liberally, and the other one in PciHotPlugInitDxe, which looks for the
PCI resource reservation hints on bridges, for hotplug purposes.)

After I had added the 2nd such scan (in PciHotPlugInitDxe), Jordan
suggested that we should rather use some helper library to save us the
manual fiddling with the capability headers and contents.

So, when you brought up PCI capabilities the last time in this thread,
the foreseeable "third scan" got stuck in my mind, and it ultimtely
spurred me to write that helper library, originally suggested by Jordan.
It's been upstream for a week now, and both Virtio10Dxe and
PciHotPlugInitDxe have been converted to use it (commit range
4b8552d794e7..5685a243b6f8). (Thank you Ard again for the review.)

If you add the above-suggested

  "hands off for platform drivers' sake"

capability in QEMU, I think we'll be able to locate and parse it cleanly
in the OVMF PCI drivers that need to honor it.

Thanks!
Laszlo



Re: [Qemu-devel] [RFC 2/3] hw/char/nrf51_uart: Implement nRF51 SoC UART

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 01:03:37AM +0300, Julia Suvorova wrote:
> The following features are not yet implemented:
> Control with SUSPEND/START*/STOP*

This is probably worth implementing for completeness.  Just rx_enabled
and tx_enabled boolean states will be sufficient.  SUSPEND flushes tx
and stops rx, it doesn't require any extra state.

> CTS/NCTS flow control

CTS flow control is probably not necessary since we don't have bitwise
I/O and guest applications probably don't care either.

> Mapping registers to pins

This is probably not necessary since the micro:bit only uses the UART in
one pin configuration (back to the USB interface chip).

Please make sure that every register is explicitly handle in the code
and falls into these categories:

1. Implemented.
2. Unimplemented - can be observed via a trace event.  This will make
   debugging easier in the future when an application doesn't work
   with the UART.
3. Stubbed out - explicitly marked as ignored in the code.

This way we can be confident about the completeness of this device
model.

> diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c
> new file mode 100644
> index 00..2da97aa0c4
> --- /dev/null
> +++ b/hw/char/nrf51_uart.c
> @@ -0,0 +1,232 @@
> +/*
> + * nRF51 SoC UART emulation

Please add a reference to the hardware specfication here:

  See nRF51 Series Reference Manual, "29 Universal Asynchronous
  Receiver/Transmitter" for hardware specifications:
  http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.pdf

> +static gboolean uart_transmit(GIOChannel *chan, GIOCondition cond, void 
> *opaque)
> +{
> +Nrf51UART *s = NRF51_UART(opaque);
> +int r;
> +
> +s->watch_tag = 0;
> +
> +r = qemu_chr_fe_write(&s->chr, (uint8_t *) &s->reg[A_TXD], 1);

This does not work on big-endian hosts:

  s->reg[A_TXD] = 'A';

little-endian memory layout: 41 00 00 00
big-endian memory layout:00 00 00 41
(uint8_t *) &s->reg[A_TXD] --^

Instead please use a uint8_t local:

  uint8_t ch = s->reg[A_TXD];

  ...

  r = qemu_chr_fe_write(&s->chr, &ch, 1);

> +if (r <= 0) {
> +s->watch_tag = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
> + uart_transmit, s);
> +if (!s->watch_tag) {
> +goto buffer_drained;

Please add a comment here:

  /* The hardware has no transmit error reporting, so silently drop the byte */

> +static void uart_receive(void *opaque, const uint8_t *buf, int size)
> +{
> +
> +   Nrf51UART *s = NRF51_UART(opaque);
> +
> +   if (s->rx_fifo_len >= UART_FIFO_LENGTH) {

Indentation is off.  Please use 4 spaces.

> +return;
> +}
> +
> +s->rx_fifo[(s->rx_fifo_pos + s->rx_fifo_len) % UART_FIFO_LENGTH] = *buf;
> +s->rx_fifo_len++;
> +
> +s->reg[A_RXDRDY] = 1;
> +nrf51_uart_update_irq(s);
> +}
> +
> +static int uart_can_receive(void *opaque)
> +{
> +Nrf51UART *s = NRF51_UART(opaque);
> +
> +return (s->rx_fifo_len < sizeof(s->rx_fifo));

This is very subtle: this function returns the number of bytes that can
be read.  This expression looks like a boolean return value but actually
relies on the implicit int cast (false -> 0 bytes, true -> 1 byte).

Please write it so it doesn't look like a boolean return value:

  if (s->rx_fifo_len >= sizeof(s->rx_fifo)) {
  return 0;
  }

  return 1 /* byte */;

Alternatively, you could handle more than 1 byte:

  return sizeof(s->rx_fifo) - s->rx_fifo_len;

but now uart_receive() needs to handle up to sizeof(s->rx_fifo) bytes of
data in a single call.

> +}
> +
> +static void nrf51_uart_realize(DeviceState *dev, Error **errp)
> +{
> +Nrf51UART *s = NRF51_UART(dev);
> +
> +qemu_chr_fe_set_handlers(&s->chr, uart_can_receive, uart_receive,
> + NULL, NULL, s, NULL, true);
> +}
> +
> +static void nrf51_uart_init(Object *obj)
> +{
> +Nrf51UART *s = NRF51_UART(obj);
> +SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
> +
> +memory_region_init_io(&s->mmio, obj, &uart_ops, s,
> +  "nrf51_soc.uart", 0x1000);

s/0x1000/UART_SIZE/

> +typedef struct Nrf51UART {
> +SysBusDevice parent_obj;
> +
> +MemoryRegion mmio;
> +CharBackend chr;
> +qemu_irq irq;
> +guint watch_tag;
> +
> +uint8_t rx_fifo[UART_FIFO_LENGTH];
> +unsigned int rx_fifo_pos;
> +unsigned int rx_fifo_len;
> +
> +uint32_t reg[0x1000];

Where does 0x1000 come from?  I think the actual 0x1000-byte range would
be uint32_t reg[UART_SIZE / sizeof(reg[0])].


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC 3/3] tests/boot-serial-test: Add support for the microbit board

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 01:03:38AM +0300, Julia Suvorova wrote:
> New mini-kernel test for nRF51 SoC UART.
> 
> Signed-off-by: Julia Suvorova 
> ---
>  tests/boot-serial-test.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
> index 4d6815c3e0..e6dbc8a293 100644
> --- a/tests/boot-serial-test.c
> +++ b/tests/boot-serial-test.c
> @@ -62,6 +62,16 @@ static const uint8_t kernel_aarch64[] = {
>  0xfd, 0xff, 0xff, 0x17, /* b   -12 (loop) */
>  };
>  
> +static const uint8_t kernel_nrf51[] = {
> +0x00, 0x00, 0x00, 0x00, /* Stack top address */
> +0x09, 0x00, 0x00, 0x00, /* Reset handler address */
> +0x01, 0x4b, /* ldr   r3,[pc,#4]Get base 
> */
> +0x54, 0x22, /* mov   r2,#'T' */
> +0x1a, 0x70, /* strb  r2,[r3] */
> +0x01, 0xe0, /* b loop */
> +0x1c, 0x25, 0x00, 0x40, /* 0x40002000 = UART0 base addr 
> */
> +};

Good for now, will need STARTTX in the future.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC 0/3] nRF51 SoC: Add UART support

2018-05-31 Thread Stefan Hajnoczi
On Tue, May 29, 2018 at 11:57:47PM -0300, Philippe Mathieu-Daudé wrote:
> Hi Julia,
> 
> On 05/29/2018 07:03 PM, Julia Suvorova via Qemu-devel wrote:
> > This series adds basic support for the nRF51 SoC UART, that used in
> > BBC Micro:bit board, and QTest for it.
> > 
> > Based-on: <20180503090532.3113-1-j...@jms.id.au>
> > 
> > Julia Suvorova (3):
> >   hw/arm/nrf51_soc: Fix compilation and memory regions
> 
> Since Joel's series isn't merged, you can simply take his patches, fix
> them, and include them in your following series.  Usually you keep his
> Signed-off-by line, add a comment with your changes, then add your
> S-o-b, such:
> 
> [PATCH ...] hw/arm: Add Nordic Semiconductor nRF51 SoC
> 
> The nRF51 is a Cortex-M0 microcontroller with an on-board radio module,
> plus other common ARM SoC peripherals.
> 
> This defines a basic model of the CPU and memory, with no peripherals
> implemented at this stage.
> 
> Signed-off-by: Joel Stanley 
> [Julia: Fixed BBC Micro:bit board ROM/RAM size, added FICR defines]
> Signed-off-by: Julia Suvorova 

I think Joel will send another revision of his series.  It would make
sense for him to squash in this patch.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v2] tap: fix memory leak on success to create a tap device

2018-05-31 Thread Jason Wang




On 2018年05月31日 15:28, wangyunjian wrote:

From: Yunjian Wang 

The memory leak on success to create a tap device. And the nfds and
nvhosts may not be the same and need to be processed separately.

Fixes: 07825977 ("tap: fix memory leak on failure to create a multiqueue tap 
device")
Fixes: 264986e2 ("tap: multiqueue support")

CC: QEMU Stable 
Signed-off-by: Yunjian Wang 
---
v2:
  -add commit log and cc qemu-stable with fixes tags
---


Applied.

Thanks


  net/tap.c | 16 ++--
  1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index de05f20..6d7710f 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -803,7 +803,8 @@ int net_init_tap(const Netdev *netdev, const char *name,
  } else if (tap->has_fds) {
  char **fds;
  char **vhost_fds;
-int nfds, nvhosts;
+int nfds = 0, nvhosts = 0;
+int ret = 0;
  
  if (tap->has_ifname || tap->has_script || tap->has_downscript ||

  tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
@@ -823,6 +824,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
  if (nfds != nvhosts) {
  error_setg(errp, "The number of fds passed does not match "
 "the number of vhostfds passed");
+ret = -1;
  goto free_fail;
  }
  }
@@ -831,6 +833,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
  fd = monitor_fd_param(cur_mon, fds[i], &err);
  if (fd == -1) {
  error_propagate(errp, err);
+ret = -1;
  goto free_fail;
  }
  
@@ -841,6 +844,7 @@ int net_init_tap(const Netdev *netdev, const char *name,

  } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
  error_setg(errp,
 "vnet_hdr not consistent across given tap fds");
+ret = -1;
  goto free_fail;
  }
  
@@ -850,21 +854,21 @@ int net_init_tap(const Netdev *netdev, const char *name,

   vnet_hdr, fd, &err);
  if (err) {
  error_propagate(errp, err);
+ret = -1;
  goto free_fail;
  }
  }
-g_free(fds);
-g_free(vhost_fds);
-return 0;
  
  free_fail:

+for (i = 0; i < nvhosts; i++) {
+g_free(vhost_fds[i]);
+}
  for (i = 0; i < nfds; i++) {
  g_free(fds[i]);
-g_free(vhost_fds[i]);
  }
  g_free(fds);
  g_free(vhost_fds);
-return -1;
+return ret;
  } else if (tap->has_helper) {
  if (tap->has_ifname || tap->has_script || tap->has_downscript ||
  tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {





Re: [Qemu-devel] [PATCH 2/3] chardev: On QIO_CHANNEL_ERR_BROKEN set errno to EPIPE

2018-05-31 Thread Marc-André Lureau
On Thu, May 31, 2018 at 9:46 AM, Sergio Lopez  wrote:
> This allows callers to identify this potentially unrecoverable error.

(missing signed-off)

Reviewed-by: Marc-André Lureau 

> ---
>  chardev/char-io.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/chardev/char-io.c b/chardev/char-io.c
> index f810524..f934eb9 100644
> --- a/chardev/char-io.c
> +++ b/chardev/char-io.c
> @@ -168,6 +168,9 @@ int io_channel_send_full(QIOChannel *ioc,
>
>  errno = EAGAIN;
>  return -1;
> +} else if (ret == QIO_CHANNEL_ERR_BROKEN) {
> +errno = EPIPE;
> +return -1;
>  } else if (ret < 0) {
>  errno = EINVAL;
>  return -1;
> --
> 1.8.3.1
>
>



-- 
Marc-André Lureau



Re: [Qemu-devel] [PATCH 3/3] hw/char/serial: Don't retry on serial_xmit if errno == EPIPE

2018-05-31 Thread Marc-André Lureau
Hi

On Thu, May 31, 2018 at 9:46 AM, Sergio Lopez  wrote:
> If writing to the frontend channel failed with EPIPE, don't set up a
> retry. EPIPE is not a recoverable error, so trying again is a waste of CPU
> cycles.
>
> If the vCPU writing to the serial device and emulator thread are pinned
> to the same pCPU, it can also compromise the stability of the Guest OS,
> as both threads will be competing for pCPU's time, with the vCPU
> actively polling the serial device and barely giving time to the
> emulator thread to make actual progress.
> ---
>  hw/char/serial.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/hw/char/serial.c b/hw/char/serial.c
> index 2c080c9..f26e86b 100644
> --- a/hw/char/serial.c
> +++ b/hw/char/serial.c
> @@ -262,6 +262,7 @@ static void serial_xmit(SerialState *s)
>  /* in loopback mode, say that we just received a char */
>  serial_receive1(s, &s->tsr, 1);
>  } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
> +   errno != EPIPE &&
> s->tsr_retry < MAX_XMIT_RETRY) {

Instead of adding explicit handling of EPIPE, shouldn't the code be
rewritten to treat -1 return && errno != EAGAIN as fatal?

>  assert(s->watch_tag == 0);
>  s->watch_tag =
> --
> 1.8.3.1
>
>



-- 
Marc-André Lureau



Re: [Qemu-devel] [PATCH 00/27] iommu: support txattrs, support TCG execution, implement TZ MPC

2018-05-31 Thread Peter Maydell
On 30 May 2018 at 17:58, Paolo Bonzini  wrote:
> On 21/05/2018 16:03, Peter Maydell wrote:
>> This patchset is a rather large one, but the first half is all
>> fairly simple plumbing. It does four things:
>>  * support IOMMUs that are aware of memory transaction attributes and
>>may generate different translations for different attributes
>>  * support TCG execution out of memory which is behind an IOMMU
>>  * implement the Arm TrustZone Memory Protection Controller
>>(which needs both the above features in the IOMMU core code)
>>  * use the MPC in the mps2-an505 board
>
> Go ahead and apply it through the ARM tree.  Thanks!

It needs a v2 (there are some patches that need fixing),
and some of the tail end of the patchset hasn't been reviewed,
and I'm not sure if we have consensus on the IOMMU API changes
completely yet. But if you're happy we're going in the right
direction I can apply 1-13 via target-arm.next (that's the
"plumb TxAttrs through various functions" patches). That will
reduce the size of the patchset and make conflicts less likely.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v2] vhost-blk: turn on pre-defined RO feature bit

2018-05-31 Thread Stefan Hajnoczi
On Tue, May 29, 2018 at 09:24:35AM +0800, Changpeng Liu wrote:
> Read only feature shouldn't be negotiable, because if the
> backend device reported Read only feature supported, QEMU
> host driver shouldn't change backend's RO attribute. While
> here, also enable the vhost-user-blk test utility to test
> RO feature.
> 
> Signed-off-by: Changpeng Liu 
> ---
>  contrib/vhost-user-blk/vhost-user-blk.c | 48 
> -
>  hw/block/vhost-user-blk.c   |  5 +---
>  include/hw/virtio/vhost-user-blk.h  |  1 -
>  3 files changed, 36 insertions(+), 18 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [Qemu-arm] [PATCH v4 2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

2018-05-31 Thread Peter Maydell
On 30 May 2018 at 02:42, Shannon Zhao  wrote:
>
>
> On 2018/5/29 22:44, Peter Maydell wrote:
>> This is where we should have a comment explaining the bug and
>> what the migration data from the old broken QEMU looks like; something
>> like:
>>
>> /* Older versions of QEMU had a bug in the handling of state save/restore
>>  * to the KVM GICv3: they got the offset in the bitmap arrays wrong,
>>  * so that instead of the data for external interrupts 32 and up
>>  * starting at bit position 32 in the bitmap, it started at bit
>>  * position 0.
> Not right here. for_each_dist_irq_reg starts from 32 and if irq is 32
> and gic_bmp_ptr32(bmp, irq) points bit 32, while offset passed to KVM is
> 0, then it will get the GICR values for bit 32 ~ bit 63. So the data
> looks like below:
> [00...0 00..0 xx..x ...]
> So we need to move the data down by 32 bits.

Yes, you're right.

thanks
-- PMM



Re: [Qemu-devel] About cpu_physical_memory_map()

2018-05-31 Thread Peter Maydell
On 30 May 2018 at 01:24, Huaicheng Li  wrote:
> Dear QEMU/KVM developers,
>
> I was trying to map a buffer in host QEMU process to a guest user space
> application. I tried to achieve this
> by allocating a buffer in the guest application first, then map this buffer
> to QEMU process address space via
> GVA -> GPA --> HVA (GPA to HVA is done via cpu_physical_memory_map). Last,
> I wrote a host kernel driver to
> walk QEMU process's page table and change corresponding page table entries
> of HVA to the HPA of the target
> buffer.

This seems like the wrong way round to try to do this. As a rule
of thumb, you'll have an easier life if you have things behave
similarly to how they would in real hardware. So it'll be simpler
if you start with the buffer in the host QEMU process, map this
in to the guest's physical address space at some GPA, tell the
guest kernel that that's the GPA to use, and have the guest kernel
map that GPA into the guest userspace process's virtual address space.
(Think of how you would map a framebuffer, for instance.)

Changing the host page table entries for QEMU under its feet seems
like it's never going to work reliably.

(I think the specific problem you're running into is that guest memory
is both mapped into the QEMU host process and also exposed to the
guest VM. The former is controlled by the page tables for the
QEMU host process, but the latter is a different set of page tables,
which QEMU asks the kernel to configure, using KVM_SET_USER_MEMORY_REGION
ioctls.)

thanks
-- PMM



Re: [Qemu-devel] [PULL 00/17] Block layer patches

2018-05-31 Thread Peter Maydell
On 30 May 2018 at 16:56, Kevin Wolf  wrote:
>   Merge remote-tracking branch 
> 'remotes/edgar/tags/edgar/xilinx-next-2018-05-29-v1.for-upstream' into 
> staging (2018-05-29 13:01:11 +0100)
>
> are available in the git repository at:
>
>   git://repo.or.cz/qemu/kevin.git tags/for-upstream
>
> for you to fetch changes up to 3fb588a0f2c006122c34e1960a15c87ae2b927eb:
>
>   block/create: Mark blockdev-create stable (2018-05-30 13:31:18 +0200)
>
> 
> Block layer patches:
>
> - Add blockdev-create job
> - qcow2: Silence Coverity false positive
>

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH 23/27] hw/core/or-irq: Support more than 16 inputs to an OR gate

2018-05-31 Thread Paolo Bonzini
On 30/05/2018 19:35, Peter Maydell wrote:
> On 30 May 2018 at 17:59, Paolo Bonzini  wrote:
>> On 21/05/2018 17:02, Peter Maydell wrote:
>>> On 21 May 2018 at 15:34, Paolo Bonzini  wrote:
 Why do the levels have to be migrated at all?  It should be enough if
 the IRQ level is either migrated manually, or restored (e.g. in
 post_save callbacks) through other data that is migrated.
>>> This is standard behaviour for devices: they track their
>>> inbound irq/gpio lines, and then that becomes internal state for
>>> them that must be migrated.
>>
>> But or_irq's input are another device's outbound lines, so tracking them
>> should not be necessary.  The other device would do it for or_irq.
> 
> There's no mechanism in qemu_irq for the destination end to ask
> the source end about its current value. The information flow
> is strictly one-way.

On postload, the source must call qemu_set_irq and that will cause an
update of the or_irq, won't it?

Paolo



Re: [Qemu-devel] [libvirt] [PATCH 2/5] qemu: Move checks for SMM from command-line creation into validation phase

2018-05-31 Thread Paolo Bonzini
On 31/05/2018 10:19, Martin Kletzander wrote:
>> 
> 
> Oh, it definitely helps.  And I always enjoy when someone explains
> low level details of something like you do, it makes me feel very
> smart after I read it =)
> 
> ...something about the shoulders of giants and stuff...
> 
> I'll fix this up according to what you described, that is smm=on/off 
> will be possible to be specified whenever -machine supports smm=.

That sounds like the right thing, thanks!

Paolo



Re: [Qemu-devel] [PATCH] qom: support orphan objects in object_get_canonical_path

2018-05-31 Thread Paolo Bonzini
On 31/05/2018 05:45, Alexey Kardashevskiy wrote:
> Well, this is correct indeed for the normal case when the result is used
> for internal business but for my task (show the owner of an MR or at least
> give a clue what to grep for) it will discard a partial path.
> 
> I guess I could print a typename if object_get_canonical_path() returns
> NULL, I'll repost v4.

Very good!

Paolo



Re: [Qemu-devel] [RFC 1/3] hw/arm/nrf51_soc: Fix compilation and memory regions

2018-05-31 Thread Peter Maydell
On 29 May 2018 at 23:03, Julia Suvorova  wrote:
> nRF51 SoC implementation is intended for the BBC Micro:bit board,
> which has 256 KB flash and 16 KB RAM.
> Added FICR defines.
>
> Signed-off-by: Julia Suvorova 
> ---
>  hw/arm/nrf51_soc.c | 12 +++-
>  include/hw/arm/nrf51_soc.h |  1 +
>  2 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c
> index e59ba7079f..6fe06dcfd2 100644
> --- a/hw/arm/nrf51_soc.c
> +++ b/hw/arm/nrf51_soc.c
> @@ -26,15 +26,17 @@
>  #define IOMEM_SIZE  0x2000
>
>  #define FLASH_BASE  0x
> -#define FLASH_SIZE  (144 * 1024)
> +#define FLASH_SIZE  (256 * 1024)
> +
> +#define FICR_BASE   0x1000
> +#define FICR_SIZE   0x100
>
>  #define SRAM_BASE   0x2000
> -#define SRAM_SIZE   (6 * 1024)
> +#define SRAM_SIZE   (16 * 1024)
>
>  static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
>  {
>  NRF51State *s = NRF51_SOC(dev_soc);
> -DeviceState *nvic;
>  Error *err = NULL;
>
>  /* IO space */
> @@ -69,8 +71,8 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error 
> **errp)
>  memory_region_add_subregion(system_memory, SRAM_BASE, sram);
>
>  /* TODO: implement a cortex m0 and update this */
> -nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
> -s->kernel_filename, ARM_CPU_TYPE_NAME("cortex-m3"));
> +s->nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
> +   s->kernel_filename, ARM_CPU_TYPE_NAME("cortex-m3"));
>  }
>
>  static Property nrf51_soc_properties[] = {
> diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h
> index 5431d200f8..a6bbe9f108 100644
> --- a/include/hw/arm/nrf51_soc.h
> +++ b/include/hw/arm/nrf51_soc.h
> @@ -23,6 +23,7 @@ typedef struct NRF51State {
>
>  /*< public >*/
>  char *kernel_filename;
> +DeviceState *nvic;
>
>  MemoryRegion iomem;
>  } NRF51State;

The better approach here (which I think I suggested in review
of Joel's patches) is to have the armv7m object embedded in the
NRF51State. Then you can initialize and realize it in-place,
and the board code calls armv7m_load_kernel() rather than
having to pass the kernel filename to the SoC. (Nothing then
needs to call armv7m_init() at all.)

thanks
-- PMM



Re: [Qemu-devel] [PATCH v3 02/17] qht: return existing entry when qht_insert fails

2018-05-31 Thread Alex Bennée


Emilio G. Cota  writes:

> The meaning of "existing" is now changed to "matches in hash and
> ht->cmp result". This is saner than just checking the pointer value.
>
> Suggested-by: Richard Henderson 
> Reviewed-by:  Richard Henderson 
> Signed-off-by: Emilio G. Cota 

Reviewed-by: Alex Bennée 

> ---
>  include/qemu/qht.h|  7 +--
>  accel/tcg/translate-all.c |  2 +-
>  tests/qht-bench.c |  4 ++--
>  tests/test-qht.c  |  8 +++-
>  util/qht.c| 27 +--
>  5 files changed, 32 insertions(+), 16 deletions(-)
>
> diff --git a/include/qemu/qht.h b/include/qemu/qht.h
> index 5f03a0f..1fb9116 100644
> --- a/include/qemu/qht.h
> +++ b/include/qemu/qht.h
> @@ -70,6 +70,7 @@ void qht_destroy(struct qht *ht);
>   * @ht: QHT to insert to
>   * @p: pointer to be inserted
>   * @hash: hash corresponding to @p
> + * @existing: address where the pointer to an existing entry can be copied to
>   *
>   * Attempting to insert a NULL @p is a bug.
>   * Inserting the same pointer @p with different @hash values is a bug.
> @@ -78,9 +79,11 @@ void qht_destroy(struct qht *ht);
>   * inserted into the hash table.
>   *
>   * Returns true on success.
> - * Returns false if the @p-@hash pair already exists in the hash table.
> + * Returns false if there is an existing entry in the table that is 
> equivalent
> + * (i.e. ht->cmp matches and the hash is the same) to @p-@h. If @existing
> + * is !NULL, a pointer to this existing entry is copied to it.
>   */
> -bool qht_insert(struct qht *ht, void *p, uint32_t hash);
> +bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing);
>
>  /**
>   * qht_lookup_custom - Look up a pointer using a custom comparison function.
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 5b7b91d..8080eb7 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1242,7 +1242,7 @@ static void tb_link_page(TranslationBlock *tb, 
> tb_page_addr_t phys_pc,
>  /* add in the hash table */
>  h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
>   tb->trace_vcpu_dstate);
> -qht_insert(&tb_ctx.htable, tb, h);
> +qht_insert(&tb_ctx.htable, tb, h, NULL);
>
>  #ifdef CONFIG_USER_ONLY
>  if (DEBUG_TB_CHECK_GATE) {
> diff --git a/tests/qht-bench.c b/tests/qht-bench.c
> index c94ac25..f492b3a 100644
> --- a/tests/qht-bench.c
> +++ b/tests/qht-bench.c
> @@ -163,7 +163,7 @@ static void do_rw(struct thread_info *info)
>  bool written = false;
>
>  if (qht_lookup(&ht, p, hash) == NULL) {
> -written = qht_insert(&ht, p, hash);
> +written = qht_insert(&ht, p, hash, NULL);
>  }
>  if (written) {
>  stats->in++;
> @@ -322,7 +322,7 @@ static void htable_init(void)
>  r = xorshift64star(r);
>  p = &keys[r & (init_range - 1)];
>  hash = h(*p);
> -if (qht_insert(&ht, p, hash)) {
> +if (qht_insert(&ht, p, hash, NULL)) {
>  break;
>  }
>  retries++;
> diff --git a/tests/test-qht.c b/tests/test-qht.c
> index b069881..dda6a06 100644
> --- a/tests/test-qht.c
> +++ b/tests/test-qht.c
> @@ -27,11 +27,17 @@ static void insert(int a, int b)
>
>  for (i = a; i < b; i++) {
>  uint32_t hash;
> +void *existing;
> +bool inserted;
>
>  arr[i] = i;
>  hash = i;
>
> -qht_insert(&ht, &arr[i], hash);
> +inserted = qht_insert(&ht, &arr[i], hash, NULL);
> +g_assert_true(inserted);
> +inserted = qht_insert(&ht, &arr[i], hash, &existing);
> +g_assert_false(inserted);
> +g_assert_true(existing == &arr[i]);
>  }
>  }
>
> diff --git a/util/qht.c b/util/qht.c
> index 8610ce3..9d030e7 100644
> --- a/util/qht.c
> +++ b/util/qht.c
> @@ -511,9 +511,9 @@ void *qht_lookup(struct qht *ht, const void *userp, 
> uint32_t hash)
>  }
>
>  /* call with head->lock held */
> -static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
> -   struct qht_bucket *head, void *p, uint32_t 
> hash,
> -   bool *needs_resize)
> +static void *qht_insert__locked(struct qht *ht, struct qht_map *map,
> +struct qht_bucket *head, void *p, uint32_t 
> hash,
> +bool *needs_resize)
>  {
>  struct qht_bucket *b = head;
>  struct qht_bucket *prev = NULL;
> @@ -523,8 +523,9 @@ static bool qht_insert__locked(struct qht *ht, struct 
> qht_map *map,
>  do {
>  for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
>  if (b->pointers[i]) {
> -if (unlikely(b->pointers[i] == p)) {
> -return false;
> +if (unlikely(b->hashes[i] == hash &&
> + ht->cmp(b->pointers[i], p))) {
> +return 

Re: [Qemu-devel] [Qemu-block] Some question about savem/qcow2 incremental snapshot

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 06:07:19PM +0200, Kevin Wolf wrote:
> Am 30.05.2018 um 16:44 hat Stefan Hajnoczi geschrieben:
> > On Mon, May 14, 2018 at 02:48:47PM +0100, Stefan Hajnoczi wrote:
> > > On Fri, May 11, 2018 at 07:25:31PM +0200, Kevin Wolf wrote:
> > > > Am 10.05.2018 um 10:26 hat Stefan Hajnoczi geschrieben:
> > > > > On Wed, May 09, 2018 at 07:54:31PM +0200, Max Reitz wrote:
> > > > > > On 2018-05-09 12:16, Stefan Hajnoczi wrote:
> > > > > > > On Tue, May 08, 2018 at 05:03:09PM +0200, Kevin Wolf wrote:
> > > > > > >> Am 08.05.2018 um 16:41 hat Eric Blake geschrieben:
> > > > > > >>> On 12/25/2017 01:33 AM, He Junyan wrote:
> > > > > > >> I think it makes sense to invest some effort into such 
> > > > > > >> interfaces, but
> > > > > > >> be prepared for a long journey.
> > > > > > > 
> > > > > > > I like the suggestion but it needs to be followed up with a 
> > > > > > > concrete
> > > > > > > design that is feasible and fair for Junyan and others to 
> > > > > > > implement.
> > > > > > > Otherwise the "long journey" is really just a way of rejecting 
> > > > > > > this
> > > > > > > feature.
> > 
> > The discussion on NVDIMM via the block layer has runs its course.  It
> > would be a big project and I don't think it's fair to ask Junyan to
> > implement it.
> > 
> > My understanding is this patch series doesn't modify the qcow2 on-disk
> > file format.  Rather, it just uses existing qcow2 mechanisms and extends
> > live migration to identify the NVDIMM state state region to share the
> > clusters.
> > 
> > Since this feature does not involve qcow2 format changes and is just an
> > optimization (dirty blocks still need to be allocated), it can be
> > removed from QEMU in the future if a better alternative becomes
> > available.
> > 
> > Junyan: Can you rebase the series and send a new revision?
> > 
> > Kevin and Max: Does this sound alright?
> 
> Do patches exist? I've never seen any, so I thought this was just the
> early design stage.

Sorry for the confusion, the earlier patch series was here:

  https://lists.nongnu.org/archive/html/qemu-devel/2018-03/msg04530.html

> I suspect that while it wouldn't change the qcow2 on-disk format in a
> way that the qcow2 spec would have to be change, it does need to change
> the VMState format that is stored as a blob within the qcow2 file.
> At least, you need to store which other snapshot it is based upon so
> that you can actually resume a VM from the incremental state.
> 
> Once you modify the VMState format/the migration stream, removing it
> from QEMU again later means that you can't load your old snapshots any
> more. Doing that, even with the two-release deprecation period, would be
> quite nasty.
> 
> But you're right, depending on how the feature is implemented, it might
> not be a thing that affects qcow2 much, but one that the migration
> maintainers need to have a look at. I kind of suspect that it would
> actually touch both parts to a degree that it would need approval from
> both sides.

VMState wire format changes are minimal.  The only issue is that the
previous snapshot's nvdimm vmstate can start at an arbitrary offset in
the qcow2 cluster.  We can find a solution to the misalignment problem
(I think Junyan's patch series adds padding).

The approach references existing clusters in the previous snapshot's
vmstate area and only allocates new clusters for dirty NVDIMM regions.
In the non-qcow2 case we fall back to writing the entire NVDIMM
contents.

So instead of:

  write(qcow2_bs, all_vmstate_data); /* duplicates nvdimm contents :( */

do:

  write(bs, vmstate_data_upto_nvdimm);
  if (is_qcow2(bs)) {
  snapshot_clone_vmstate_range(bs, previous_snapshot,
   offset_to_nvdimm_vmstate);
  overwrite_nvdimm_dirty_blocks(bs, nvdimm);
  } else {
  write(bs, nvdimm_vmstate_data);
  }
  write(bs, vmstate_data_after_nvdimm);

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 23/27] hw/core/or-irq: Support more than 16 inputs to an OR gate

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 11:21, Paolo Bonzini  wrote:
> On 30/05/2018 19:35, Peter Maydell wrote:
>> On 30 May 2018 at 17:59, Paolo Bonzini  wrote:
>>> On 21/05/2018 17:02, Peter Maydell wrote:
 On 21 May 2018 at 15:34, Paolo Bonzini  wrote:
> Why do the levels have to be migrated at all?  It should be enough if
> the IRQ level is either migrated manually, or restored (e.g. in
> post_save callbacks) through other data that is migrated.
 This is standard behaviour for devices: they track their
 inbound irq/gpio lines, and then that becomes internal state for
 them that must be migrated.
>>>
>>> But or_irq's input are another device's outbound lines, so tracking them
>>> should not be necessary.  The other device would do it for or_irq.
>>
>> There's no mechanism in qemu_irq for the destination end to ask
>> the source end about its current value. The information flow
>> is strictly one-way.
>
> On postload, the source must call qemu_set_irq and that will cause an
> update of the or_irq, won't it?

No, calling qemu_set_irq in postload is a bug. (You don't know
which order the state of the source and destination devices is
restored, so asserting a signal in postload would have different
effects depending on whether the destination had already had
its state restored or not.) The system design relies on each
device keeping track of (and migrating) the state of the input
lines it cares about.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v4 04/12] migration: avoid concurrent invoke channel_close by different threads

2018-05-31 Thread Dr. David Alan Gilbert
* 858585 jemmy (jemmy858...@gmail.com) wrote:
> On Wed, May 30, 2018 at 10:45 PM, Dr. David Alan Gilbert
>  wrote:
> > * Lidong Chen (jemmy858...@gmail.com) wrote:
> >> From: Lidong Chen 
> >>
> >> The channel_close maybe invoked by different threads. For example, source
> >> qemu invokes qemu_fclose in main thread, migration thread and return path
> >> thread. Destination qemu invokes qemu_fclose in main thread, listen thread
> >> and COLO incoming thread.
> >>
> >> Add a mutex in QEMUFile struct to avoid concurrent invoke channel_close.
> >>
> >> Signed-off-by: Lidong Chen 
> >> ---
> >>  migration/qemu-file.c | 5 +
> >>  1 file changed, 5 insertions(+)
> >>
> >> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> >> index 977b9ae..87d0f05 100644
> >> --- a/migration/qemu-file.c
> >> +++ b/migration/qemu-file.c
> >> @@ -52,6 +52,7 @@ struct QEMUFile {
> >>  unsigned int iovcnt;
> >>
> >>  int last_error;
> >> +QemuMutex lock;
> >
> > That could do with a comment saying what you're protecting
> >
> >>  };
> >>
> >>  /*
> >> @@ -96,6 +97,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps 
> >> *ops)
> >>
> >>  f = g_new0(QEMUFile, 1);
> >>
> >> +qemu_mutex_init(&f->lock);
> >>  f->opaque = opaque;
> >>  f->ops = ops;
> >>  return f;
> >> @@ -328,7 +330,9 @@ int qemu_fclose(QEMUFile *f)
> >>  ret = qemu_file_get_error(f);
> >>
> >>  if (f->ops->close) {
> >> +qemu_mutex_lock(&f->lock);
> >>  int ret2 = f->ops->close(f->opaque);
> >> +qemu_mutex_unlock(&f->lock);
> >
> > OK, and at least for the RDMA code, if it calls
> > close a 2nd time, rioc->rdma is checked so it wont actually free stuff a
> > 2nd time.
> >
> >>  if (ret >= 0) {
> >>  ret = ret2;
> >>  }
> >> @@ -339,6 +343,7 @@ int qemu_fclose(QEMUFile *f)
> >>  if (f->last_error) {
> >>  ret = f->last_error;
> >>  }
> >> +qemu_mutex_destroy(&f->lock);
> >>  g_free(f);
> >
> > Hmm but that's not safe; if two things really do call qemu_fclose()
> > on the same structure they race here and can end up destroying the lock
> > twice, or doing f->lock  after the 1st one has already g_free(f).
> 
> >
> >
> > So lets go back a step.
> > I think:
> >   a) There should always be a separate QEMUFile* for
> >  to_src_file and from_src_file - I don't see where you open
> >  the 2nd one; I don't see your implementation of
> >  f->ops->get_return_path.
> 
> yes, current qemu version use a separate QEMUFile* for to_src_file and
> from_src_file.
> and the two QEMUFile point to one QIOChannelRDMA.
> 
> the f->ops->get_return_path is implemented by channel_output_ops or
> channel_input_ops.

Ah OK, yes that makes sense.

> >   b) I *think* that while the different threads might all call
> >  fclose(), I think there should only ever be one qemu_fclose
> >  call for each direction on the QEMUFile.
> >
> > But now we have two problems:
> >   If (a) is true then f->lock  is separate on each one so
> >doesn't really protect if the two directions are closed
> >at once. (Assuming (b) is true)
> 
> yes, you are right.  so I should add a QemuMutex in QIOChannel structure, not
> QEMUFile structure. and qemu_mutex_destroy the QemuMutex in
> qio_channel_finalize.

OK, that sounds better.

Dave

> Thank you.
> 
> >
> >   If (a) is false and we actually share a single QEMUFile then
> >  that race at the end happens.
> >
> > Dave
> >
> >
> >>  trace_qemu_file_fclose();
> >>  return ret;
> >> --
> >> 1.8.3.1
> >>
> > --
> > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH 1/4] test: Move reusable code from tpm-crb-swtpm-test.c to tpm-util.c

2018-05-31 Thread Marc-André Lureau
On Wed, May 30, 2018 at 9:35 PM, Stefan Berger
 wrote:
> Move code we can reuse from tpm-crb-swtpm-test.c into tpm-util.c
> and prefix functions with 'tpm_util_'.
>
> Remove some unnecessary #include's.
>
> Signed-off-by: Stefan Berger 

Reviewed-by: Marc-André Lureau 

> ---
>  tests/tpm-crb-swtpm-test.c | 100 
> +++--
>  tests/tpm-util.c   |  89 
>  tests/tpm-util.h   |  10 +
>  3 files changed, 104 insertions(+), 95 deletions(-)
>
> diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c
> index c2bde0cbaa..2a22b032ca 100644
> --- a/tests/tpm-crb-swtpm-test.c
> +++ b/tests/tpm-crb-swtpm-test.c
> @@ -15,12 +15,8 @@
>  #include "qemu/osdep.h"
>  #include 
>
> -#include "hw/acpi/tpm.h"
> -#include "io/channel-socket.h"
>  #include "libqtest.h"
>  #include "tpm-util.h"
> -#include "sysemu/tpm.h"
> -#include "qapi/qmp/qdict.h"
>
>  typedef struct TestState {
>  char *src_tpm_path;
> @@ -28,93 +24,6 @@ typedef struct TestState {
>  char *uri;
>  } TestState;
>
> -bool got_stop;
> -
> -static void migrate(QTestState *who, const char *uri)
> -{
> -QDict *rsp;
> -gchar *cmd;
> -
> -cmd = g_strdup_printf("{ 'execute': 'migrate',"
> -  "'arguments': { 'uri': '%s' } }",
> -  uri);
> -rsp = qtest_qmp(who, cmd);
> -g_free(cmd);
> -g_assert(qdict_haskey(rsp, "return"));
> -qobject_unref(rsp);
> -}
> -
> -/*
> - * Events can get in the way of responses we are actually waiting for.
> - */
> -static QDict *wait_command(QTestState *who, const char *command)
> -{
> -const char *event_string;
> -QDict *response;
> -
> -response = qtest_qmp(who, command);
> -
> -while (qdict_haskey(response, "event")) {
> -/* OK, it was an event */
> -event_string = qdict_get_str(response, "event");
> -if (!strcmp(event_string, "STOP")) {
> -got_stop = true;
> -}
> -qobject_unref(response);
> -response = qtest_qmp_receive(who);
> -}
> -return response;
> -}
> -
> -static void wait_for_migration_complete(QTestState *who)
> -{
> -while (true) {
> -QDict *rsp, *rsp_return;
> -bool completed;
> -const char *status;
> -
> -rsp = wait_command(who, "{ 'execute': 'query-migrate' }");
> -rsp_return = qdict_get_qdict(rsp, "return");
> -status = qdict_get_str(rsp_return, "status");
> -completed = strcmp(status, "completed") == 0;
> -g_assert_cmpstr(status, !=,  "failed");
> -qobject_unref(rsp);
> -if (completed) {
> -return;
> -}
> -usleep(1000);
> -}
> -}
> -
> -static void migration_start_qemu(QTestState **src_qemu, QTestState 
> **dst_qemu,
> - SocketAddress *src_tpm_addr,
> - SocketAddress *dst_tpm_addr,
> - const char *miguri)
> -{
> -char *src_qemu_args, *dst_qemu_args;
> -
> -src_qemu_args = g_strdup_printf(
> -"-chardev socket,id=chr,path=%s "
> -"-tpmdev emulator,id=dev,chardev=chr "
> -"-device tpm-crb,tpmdev=dev ",
> -src_tpm_addr->u.q_unix.path);
> -
> -*src_qemu = qtest_init(src_qemu_args);
> -
> -dst_qemu_args = g_strdup_printf(
> -"-chardev socket,id=chr,path=%s "
> -"-tpmdev emulator,id=dev,chardev=chr "
> -"-device tpm-crb,tpmdev=dev "
> -"-incoming %s",
> -dst_tpm_addr->u.q_unix.path,
> -miguri);
> -
> -*dst_qemu = qtest_init(dst_qemu_args);
> -
> -free(src_qemu_args);
> -free(dst_qemu_args);
> -}
> -
>  static void tpm_crb_swtpm_test(const void *data)
>  {
>  char *args = NULL;
> @@ -183,8 +92,9 @@ static void tpm_crb_swtpm_migration_test(const void *data)
>  goto err_src_tpm_kill;
>  }
>
> -migration_start_qemu(&src_qemu, &dst_qemu, src_tpm_addr, dst_tpm_addr,
> - ts->uri);
> +tpm_util_migration_start_qemu(&src_qemu, &dst_qemu,
> +  src_tpm_addr, dst_tpm_addr,
> +  ts->uri);
>
>  tpm_util_startup(src_qemu, tpm_util_crb_transfer);
>  tpm_util_pcrextend(src_qemu, tpm_util_crb_transfer);
> @@ -197,8 +107,8 @@ static void tpm_crb_swtpm_migration_test(const void *data)
>  tpm_util_pcrread(src_qemu, tpm_util_crb_transfer, tpm_pcrread_resp,
>   sizeof(tpm_pcrread_resp));
>
> -migrate(src_qemu, ts->uri);
> -wait_for_migration_complete(src_qemu);
> +tpm_util_migrate(src_qemu, ts->uri);
> +tpm_util_wait_for_migration_complete(src_qemu);
>
>  tpm_util_pcrread(dst_qemu, tpm_util_crb_transfer, tpm_pcrread_resp,
>   sizeof(tpm_pcrread_resp));
> diff --git a/tests/tpm-util.c b/tests/tpm-util.c
> index c9b3947c1c..e6e3b922fa 100644
> --- a/tests/tpm-util.c
> +++ b/tests/tpm-util

Re: [Qemu-devel] [PATCH v4 1/5] Add functional/acceptance tests infrastructure

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 02:41:52PM -0400, Cleber Rosa wrote:
> + * Donwload (and cache) remote data files, such as firmware and kernel

s/Donwload/Download/

Aside from that:

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/4] test: Move common TPM test functions to tpm-tests.c

2018-05-31 Thread Marc-André Lureau
On Wed, May 30, 2018 at 9:35 PM, Stefan Berger
 wrote:
> Move common TPM test functions from tpm-crb-swtpm-test.c to tpm-tests.c
> so that for example test cases with the TPM TIS interface can use the
> same code. Prefix all funcions with 'tpm_test_'.
>
> Signed-off-by: Stefan Berger 

Reviewed-by: Marc-André Lureau 

> ---
>  tests/Makefile.include |   2 +-
>  tests/tpm-crb-swtpm-test.c |  99 ++--
>  tests/tpm-tests.c  | 124 
> +
>  tests/tpm-tests.h  |  24 +
>  4 files changed, 153 insertions(+), 96 deletions(-)
>  create mode 100644 tests/tpm-tests.c
>  create mode 100644 tests/tpm-tests.h
>
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index b499ba1813..1597d09bd8 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -723,7 +723,7 @@ tests/test-io-task$(EXESUF): tests/test-io-task.o 
> $(test-io-obj-y)
>  tests/test-io-channel-socket$(EXESUF): tests/test-io-channel-socket.o \
>  tests/io-channel-helpers.o tests/socket-helpers.o $(test-io-obj-y)
>  tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o 
> tests/tpm-emu.o \
> -   tests/tpm-util.o $(test-io-obj-y)
> +   tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y)
>  tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
>  tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
>  tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \
> diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c
> index 2a22b032ca..4ac568 100644
> --- a/tests/tpm-crb-swtpm-test.c
> +++ b/tests/tpm-crb-swtpm-test.c
> @@ -16,7 +16,7 @@
>  #include 
>
>  #include "libqtest.h"
> -#include "tpm-util.h"
> +#include "tpm-tests.h"
>
>  typedef struct TestState {
>  char *src_tpm_path;
> @@ -26,108 +26,17 @@ typedef struct TestState {
>
>  static void tpm_crb_swtpm_test(const void *data)
>  {
> -char *args = NULL;
> -QTestState *s;
> -SocketAddress *addr = NULL;
> -gboolean succ;
> -GPid swtpm_pid;
> -GError *error = NULL;
>  const TestState *ts = data;
>
> -succ = tpm_util_swtpm_start(ts->src_tpm_path, &swtpm_pid, &addr, &error);
> -/* succ may be false if swtpm is not available */
> -if (!succ) {
> -return;
> -}
> -
> -args = g_strdup_printf(
> -"-chardev socket,id=chr,path=%s "
> -"-tpmdev emulator,id=dev,chardev=chr "
> -"-device tpm-crb,tpmdev=dev",
> -addr->u.q_unix.path);
> -
> -s = qtest_start(args);
> -g_free(args);
> -
> -tpm_util_startup(s, tpm_util_crb_transfer);
> -tpm_util_pcrextend(s, tpm_util_crb_transfer);
> -
> -unsigned char tpm_pcrread_resp[] =
> -"\x80\x01\x00\x00\x00\x3e\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00"
> -"\x00\x01\x00\x0b\x03\x00\x04\x00\x00\x00\x00\x01\x00\x20\xf6\x85"
> -"\x98\xe5\x86\x8d\xe6\x8b\x97\x29\x99\x60\xf2\x71\x7d\x17\x67\x89"
> -"\xa4\x2f\x9a\xae\xa8\xc7\xb7\xaa\x79\xa8\x62\x56\xc1\xde";
> -tpm_util_pcrread(s, tpm_util_crb_transfer, tpm_pcrread_resp,
> - sizeof(tpm_pcrread_resp));
> -
> -qtest_end();
> -tpm_util_swtpm_kill(swtpm_pid);
> -
> -if (addr) {
> -g_unlink(addr->u.q_unix.path);
> -qapi_free_SocketAddress(addr);
> -}
> +tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer);
>  }
>
>  static void tpm_crb_swtpm_migration_test(const void *data)
>  {
>  const TestState *ts = data;
> -gboolean succ;
> -GPid src_tpm_pid, dst_tpm_pid;
> -SocketAddress *src_tpm_addr = NULL, *dst_tpm_addr = NULL;
> -GError *error = NULL;
> -QTestState *src_qemu, *dst_qemu;
> -
> -succ = tpm_util_swtpm_start(ts->src_tpm_path, &src_tpm_pid,
> -&src_tpm_addr, &error);
> -/* succ may be false if swtpm is not available */
> -if (!succ) {
> -return;
> -}
> -
> -succ = tpm_util_swtpm_start(ts->dst_tpm_path, &dst_tpm_pid,
> -&dst_tpm_addr, &error);
> -/* succ may be false if swtpm is not available */
> -if (!succ) {
> -goto err_src_tpm_kill;
> -}
> -
> -tpm_util_migration_start_qemu(&src_qemu, &dst_qemu,
> -  src_tpm_addr, dst_tpm_addr,
> -  ts->uri);
> -
> -tpm_util_startup(src_qemu, tpm_util_crb_transfer);
> -tpm_util_pcrextend(src_qemu, tpm_util_crb_transfer);
> -
> -unsigned char tpm_pcrread_resp[] =
> -"\x80\x01\x00\x00\x00\x3e\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00"
> -"\x00\x01\x00\x0b\x03\x00\x04\x00\x00\x00\x00\x01\x00\x20\xf6\x85"
> -"\x98\xe5\x86\x8d\xe6\x8b\x97\x29\x99\x60\xf2\x71\x7d\x17\x67\x89"
> -"\xa4\x2f\x9a\xae\xa8\xc7\xb7\xaa\x79\xa8\x62\x56\xc1\xde";
> -tpm_util_pcrread(src_qemu, tpm_util_crb_transfer, tpm_pcrread_resp,
> -   

Re: [Qemu-devel] [PATCH v4 5/5] Acceptance tests: add Linux kernel boot and console checking test

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 02:41:56PM -0400, Cleber Rosa wrote:
> This test boots a Linux kernel, and checks that the given command
> line was effective in two ways:
> 
>  * It makes the kernel use the set "console device" as a console
>  * The kernel records the command line as expected in the console
> 
> Given that way too many error conditions may occur, and detecting the
> kernel boot progress status may not be trivial, this test relies on a
> timeout to handle unexpected situations.  Also, it's *not* tagged as a
> quick test for obvious reasons.
> 
> It may be useful, while interactively running/debugging this test, or
> tests similar to this one, to show some of the logging channels.
> Example:
> 
>  $ avocado --show=QMP,console run boot_linux_console.py
> 
> Signed-off-by: Cleber Rosa 
> ---
>  tests/acceptance/boot_linux_console.py | 47 ++
>  1 file changed, 47 insertions(+)
>  create mode 100644 tests/acceptance/boot_linux_console.py

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 3/4] test: Pass TPM interface model to functions creating command line

2018-05-31 Thread Marc-André Lureau
On Wed, May 30, 2018 at 9:35 PM, Stefan Berger
 wrote:
> Pass the TPM interface model, such as 'tpm-crb', through to the functions
> that create the command line for QEMU.
>
> Signed-off-by: Stefan Berger 

Reviewed-by: Marc-André Lureau 

> ---
>  tests/tpm-crb-swtpm-test.c |  4 ++--
>  tests/tpm-tests.c  | 13 -
>  tests/tpm-tests.h  |  6 --
>  tests/tpm-util.c   | 11 ++-
>  tests/tpm-util.h   |  3 ++-
>  5 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c
> index 4ac568..8c0a55f3ca 100644
> --- a/tests/tpm-crb-swtpm-test.c
> +++ b/tests/tpm-crb-swtpm-test.c
> @@ -28,7 +28,7 @@ static void tpm_crb_swtpm_test(const void *data)
>  {
>  const TestState *ts = data;
>
> -tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer);
> +tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer, "tpm-crb");
>  }
>
>  static void tpm_crb_swtpm_migration_test(const void *data)
> @@ -36,7 +36,7 @@ static void tpm_crb_swtpm_migration_test(const void *data)
>  const TestState *ts = data;
>
>  tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, 
> ts->uri,
> -  tpm_util_crb_transfer);
> +  tpm_util_crb_transfer, "tpm-crb");
>  }
>
>  int main(int argc, char **argv)
> diff --git a/tests/tpm-tests.c b/tests/tpm-tests.c
> index adf2c618c8..10c6592aac 100644
> --- a/tests/tpm-tests.c
> +++ b/tests/tpm-tests.c
> @@ -18,7 +18,8 @@
>  #include "libqtest.h"
>  #include "tpm-tests.h"
>
> -void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx)
> +void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx,
> + const char *ifmodel)
>  {
>  char *args = NULL;
>  QTestState *s;
> @@ -36,8 +37,8 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func 
> *tx)
>  args = g_strdup_printf(
>  "-chardev socket,id=chr,path=%s "
>  "-tpmdev emulator,id=dev,chardev=chr "
> -"-device tpm-crb,tpmdev=dev",
> -addr->u.q_unix.path);
> +"-device %s,tpmdev=dev",
> +addr->u.q_unix.path, ifmodel);
>
>  s = qtest_start(args);
>  g_free(args);
> @@ -64,7 +65,8 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func 
> *tx)
>
>  void tpm_test_swtpm_migration_test(const char *src_tpm_path,
> const char *dst_tpm_path,
> -   const char *uri, tx_func *tx)
> +   const char *uri, tx_func *tx,
> +   const char *ifmodel)
>  {
>  gboolean succ;
>  GPid src_tpm_pid, dst_tpm_pid;
> @@ -87,7 +89,8 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path,
>  }
>
>  tpm_util_migration_start_qemu(&src_qemu, &dst_qemu,
> -  src_tpm_addr, dst_tpm_addr, uri);
> +  src_tpm_addr, dst_tpm_addr, uri,
> +  ifmodel);
>
>  tpm_util_startup(src_qemu, tx);
>  tpm_util_pcrextend(src_qemu, tx);
> diff --git a/tests/tpm-tests.h b/tests/tpm-tests.h
> index 377f184c77..b97688fe75 100644
> --- a/tests/tpm-tests.h
> +++ b/tests/tpm-tests.h
> @@ -15,10 +15,12 @@
>
>  #include "tpm-util.h"
>
> -void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx);
> +void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx,
> + const char *ifmodel);
>
>  void tpm_test_swtpm_migration_test(const char *src_tpm_path,
> const char *dst_tpm_path,
> -   const char *uri, tx_func *tx);
> +   const char *uri, tx_func *tx,
> +   const char *ifmodel);
>
>  #endif /* TESTS_TPM_TESTS_H */
> diff --git a/tests/tpm-util.c b/tests/tpm-util.c
> index e6e3b922fa..e1ac4d1bd5 100644
> --- a/tests/tpm-util.c
> +++ b/tests/tpm-util.c
> @@ -248,25 +248,26 @@ void tpm_util_migration_start_qemu(QTestState 
> **src_qemu,
> QTestState **dst_qemu,
> SocketAddress *src_tpm_addr,
> SocketAddress *dst_tpm_addr,
> -   const char *miguri)
> +   const char *miguri,
> +   const char *ifmodel)
>  {
>  char *src_qemu_args, *dst_qemu_args;
>
>  src_qemu_args = g_strdup_printf(
>  "-chardev socket,id=chr,path=%s "
>  "-tpmdev emulator,id=dev,chardev=chr "
> -"-device tpm-crb,tpmdev=dev ",
> -src_tpm_addr->u.q_unix.path);
> +"-device %s,tpmdev=dev ",
> +src_tpm_addr->u.q_unix.path, ifmodel);
>
>  *src_qemu = qtest_init(src_qemu_args);
>
>  dst_qemu_args = g_strdup_printf(
>  "-chardev socket,id=chr,path=%s "
>

Re: [Qemu-devel] [PATCH v4 10/12] migration: create a dedicated thread to release rdma resource

2018-05-31 Thread Dr. David Alan Gilbert
* 858585 jemmy (jemmy858...@gmail.com) wrote:
> On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
>  wrote:
> > * Lidong Chen (jemmy858...@gmail.com) wrote:
> >> ibv_dereg_mr wait for a long time for big memory size virtual server.
> >>
> >> The test result is:
> >>   10GB  326ms
> >>   20GB  699ms
> >>   30GB  1021ms
> >>   40GB  1387ms
> >>   50GB  1712ms
> >>   60GB  2034ms
> >>   70GB  2457ms
> >>   80GB  2807ms
> >>   90GB  3107ms
> >>   100GB 3474ms
> >>   110GB 3735ms
> >>   120GB 4064ms
> >>   130GB 4567ms
> >>   140GB 4886ms
> >>
> >> this will cause the guest os hang for a while when migration finished.
> >> So create a dedicated thread to release rdma resource.
> >>
> >> Signed-off-by: Lidong Chen 
> >> ---
> >>  migration/rdma.c | 21 +
> >>  1 file changed, 17 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index dfa4f77..1b9e261 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -2979,12 +2979,12 @@ static void 
> >> qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
> >>  }
> >>  }
> >>
> >> -static int qio_channel_rdma_close(QIOChannel *ioc,
> >> -  Error **errp)
> >> +static void *qio_channel_rdma_close_thread(void *arg)
> >>  {
> >> -QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> >> +QIOChannelRDMA *rioc = arg;
> >>  RDMAContext *rdmain, *rdmaout;
> >> -trace_qemu_rdma_close();
> >> +
> >> +rcu_register_thread();
> >>
> >>  rdmain = rioc->rdmain;
> >>  if (rdmain) {
> >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
> >>  g_free(rdmain);
> >>  g_free(rdmaout);
> >>
> >> +rcu_unregister_thread();
> >> +return NULL;
> >> +}
> >> +
> >> +static int qio_channel_rdma_close(QIOChannel *ioc,
> >> +  Error **errp)
> >> +{
> >> +QemuThread t;
> >> +QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> >> +trace_qemu_rdma_close();
> >> +
> >> +qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
> >> +   rioc, QEMU_THREAD_DETACHED);
> >
> > I don't think this can be this simple; consider the lock in patch 4;
> > now that lock means qui_channel_rdma_close() can't be called in
> > parallel; but with this change it means:
> >
> >
> >  f->lock
> >qemu_thread_create  (1)
> > !f->lock
> >  f->lock
> >qemu_thread_create
> > !f->lock
> >
> > so we don't really protect the thing you were trying to lock
> 
> yes, I should not use rioc as the thread arg.
> 
> static int qio_channel_rdma_close(QIOChannel *ioc,
>   Error **errp)
> {
> QemuThread t;
> RDMAContext *rdma[2];
> QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
> 
> trace_qemu_rdma_close();
> if (rioc->rdmain || rioc->rdmaout) {
> rdma[0] =  rioc->rdmain;
> rdma[1] =  rioc->rdmaout;
> qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>rdma, QEMU_THREAD_DETACHED);
> rioc->rdmain = NULL;
> rioc->rdmaout = NULL;

Is it safe to close both directions at once?
For example, if you get the close from the return path thread, might the
main thread be still using it's QEMUFile in the opposite direction;
it'll call close a little bit later?

Dave

> }
> return 0;
> }
> 
> >
> > Dave
> >
> >>  return 0;
> >>  }
> >>
> >> --
> >> 1.8.3.1
> >>
> > --
> > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH 4/4] test: Add swtpm migration test for the TPM TIS interface

2018-05-31 Thread Marc-André Lureau
Hi

On Wed, May 30, 2018 at 9:35 PM, Stefan Berger
 wrote:
> Add a test case for testing swtpm migration with the TPM TIS
> interface.
>
> Signed-off-by: Stefan Berger 

Reviewed-by: Marc-André Lureau 

> ---
>  tests/Makefile.include |  3 +++
>  tests/tpm-tis-swtpm-test.c | 66 
> ++
>  tests/tpm-util.c   | 48 +
>  tests/tpm-util.h   |  3 +++
>  4 files changed, 120 insertions(+)
>  create mode 100644 tests/tpm-tis-swtpm-test.c
>
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 1597d09bd8..8a28c49d86 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -299,6 +299,7 @@ check-qtest-x86_64-$(CONFIG_VHOST_USER_NET_TEST_x86_64) 
> += tests/vhost-user-test
>  endif
>  check-qtest-i386-$(CONFIG_TPM) += tests/tpm-crb-swtpm-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_TPM) += tests/tpm-crb-test$(EXESUF)
> +check-qtest-i386-$(CONFIG_TPM) += tests/tpm-tis-swtpm-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_TPM) += tests/tpm-tis-test$(EXESUF)
>  check-qtest-i386-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF)
>  check-qtest-i386-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF)
> @@ -725,6 +726,8 @@ tests/test-io-channel-socket$(EXESUF): 
> tests/test-io-channel-socket.o \
>  tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o 
> tests/tpm-emu.o \
> tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y)
>  tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
> +tests/tpm-tis-swtpm-test$(EXESUF): tests/tpm-tis-swtpm-test.o 
> tests/tpm-emu.o \
> +   tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y)
>  tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-emu.o 
> $(test-io-obj-y)
>  tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \
>  tests/io-channel-helpers.o $(test-io-obj-y)
> diff --git a/tests/tpm-tis-swtpm-test.c b/tests/tpm-tis-swtpm-test.c
> new file mode 100644
> index 00..7dcd1d3912
> --- /dev/null
> +++ b/tests/tpm-tis-swtpm-test.c
> @@ -0,0 +1,66 @@
> +/*
> + * QTest testcase for TPM TIS talking to external swtpm and swtpm migration
> + *
> + * Copyright (c) 2018 IBM Corporation
> + *  with parts borrowed from migration-test.c that is:
> + * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates

Probably not relevant anymore.

> + *
> + * Authors:
> + *   Stefan Berger 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include 
> +
> +#include "libqtest.h"
> +#include "tpm-tests.h"
> +
> +typedef struct TestState {
> +char *src_tpm_path;
> +char *dst_tpm_path;
> +char *uri;
> +} TestState;
> +
> +static void tpm_tis_swtpm_test(const void *data)
> +{
> +const TestState *ts = data;
> +
> +tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, "tpm-tis");
> +}
> +
> +static void tpm_tis_swtpm_migration_test(const void *data)
> +{
> +const TestState *ts = data;
> +
> +tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, 
> ts->uri,
> +  tpm_util_tis_transfer, "tpm-tis");
> +}
> +
> +int main(int argc, char **argv)
> +{
> +int ret;
> +TestState ts = { 0 };
> +
> +ts.src_tpm_path = g_dir_make_tmp("qemu-tpm-tis-swtpm-test.XX", NULL);
> +ts.dst_tpm_path = g_dir_make_tmp("qemu-tpm-tis-swtpm-test.XX", NULL);
> +ts.uri = g_strdup_printf("unix:%s/migsocket", ts.src_tpm_path);
> +
> +module_call_init(MODULE_INIT_QOM);
> +g_test_init(&argc, &argv, NULL);
> +
> +qtest_add_data_func("/tpm/tis-swtpm/test", &ts, tpm_tis_swtpm_test);
> +qtest_add_data_func("/tpm/tis-swtpm-migration/test", &ts,
> +tpm_tis_swtpm_migration_test);
> +ret = g_test_run();
> +
> +g_rmdir(ts.dst_tpm_path);
> +g_free(ts.dst_tpm_path);
> +g_rmdir(ts.src_tpm_path);
> +g_free(ts.src_tpm_path);
> +g_free(ts.uri);
> +
> +return ret;
> +}
> diff --git a/tests/tpm-util.c b/tests/tpm-util.c
> index e1ac4d1bd5..672cedf905 100644
> --- a/tests/tpm-util.c
> +++ b/tests/tpm-util.c
> @@ -19,6 +19,9 @@
>  #include "tpm-util.h"
>  #include "qapi/qmp/qdict.h"
>
> +#define TIS_REG(LOCTY, REG) \
> +(TPM_TIS_ADDR_BASE + ((LOCTY) << 12) + REG)
> +
>  static bool got_stop;
>
>  void tpm_util_crb_transfer(QTestState *s,
> @@ -52,6 +55,51 @@ void tpm_util_crb_transfer(QTestState *s,
>  qtest_memread(s, raddr, rsp, rsp_size);
>  }
>
> +void tpm_util_tis_transfer(QTestState *s,
> +   const unsigned char *req, size_t req_size,
> +   unsigned char *rsp, size_t rsp_size)
> +{
> +uint32_t sts;
> +uint16_t bcount;
> +size_t i;
> +
> +/* request use of locality 0 */
> +qtest_writeb(s, TIS_REG(0, TPM_TIS_REG_ACCESS), 
> TPM_TIS_ACCESS_REQUEST_USE);
> +qtest_writel

Re: [Qemu-devel] [PATCH v4 4/5] scripts/qemu.py: introduce set_console() method

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 02:41:55PM -0400, Cleber Rosa wrote:
> The set_console() method is intended to ease higher level use cases
> that require a console device.
> 
> The amount of intelligence is limited on purpose, requiring either the
> device type explicitly, or the existence of a machine (pattern)
> definition.
> 
> Because of the console device type selection criteria (by machine
> type), users should also be able to define that.  It'll then be used
> for both '-machine' and for the console device type selection.
> 
> Users of the set_console() method will certainly be interested in
> accessing the console device, and for that a console_socket property
> has been added.
> 
> Signed-off-by: Cleber Rosa 
> ---
>  scripts/qemu.py | 97 -
>  1 file changed, 96 insertions(+), 1 deletion(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v5 1/2] arm_gicv3_kvm: kvm_dist_get/put_priority: skip the registers banked by GICR_IPRIORITYR

2018-05-31 Thread Auger Eric
Hi,

On 05/31/2018 05:15 AM, Shannon Zhao wrote:
> While for_each_dist_irq_reg loop starts from GIC_INTERNAL, it forgot to
> offset the date array and index. This will overlap the GICR registers
> value and leave the last GIC_INTERNAL irq's registers out of update.
> 
> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
> Cc: qemu-sta...@nongnu.org
> Reviewed-by: Peter Maydell 
Reviewed-by: Eric Auger 

Thanks

Eric
> Signed-off-by: Shannon Zhao 
> ---
>  hw/intc/arm_gicv3_kvm.c | 18 --
>  1 file changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 3536795..147e691 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -135,7 +135,14 @@ static void kvm_dist_get_priority(GICv3State *s, 
> uint32_t offset, uint8_t *bmp)
>  uint32_t reg, *field;
>  int irq;
>  
> -field = (uint32_t *)bmp;
> +/* For the KVM GICv3, affinity routing is always enabled, and the first 8
> + * GICD_IPRIORITYR registers are always RAZ/WI. The corresponding
> + * functionality is replaced by GICR_IPRIORITYR. It doesn't need to
> + * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp 
> and
> + * offset.
> + */
> +field = (uint32_t *)(bmp + GIC_INTERNAL);
> +offset += (GIC_INTERNAL * 8) / 8;
>  for_each_dist_irq_reg(irq, s->num_irq, 8) {
>  kvm_gicd_access(s, offset, ®, false);
>  *field = reg;
> @@ -149,7 +156,14 @@ static void kvm_dist_put_priority(GICv3State *s, 
> uint32_t offset, uint8_t *bmp)
>  uint32_t reg, *field;
>  int irq;
>  
> -field = (uint32_t *)bmp;
> +/* For the KVM GICv3, affinity routing is always enabled, and the first 8
> + * GICD_IPRIORITYR registers are always RAZ/WI. The corresponding
> + * functionality is replaced by GICR_IPRIORITYR. It doesn't need to
> + * sync them. So it needs to skip the field of GIC_INTERNAL irqs in bmp 
> and
> + * offset.
> + */
> +field = (uint32_t *)(bmp + GIC_INTERNAL);
> +offset += (GIC_INTERNAL * 8) / 8;
>  for_each_dist_irq_reg(irq, s->num_irq, 8) {
>  reg = *field;
>  kvm_gicd_access(s, offset, ®, true);
> 



Re: [Qemu-devel] [PATCH 3/3] hw/char/serial: Don't retry on serial_xmit if errno == EPIPE

2018-05-31 Thread Sergio Lopez
On Thu, May 31, 2018 at 11:52:01AM +0200, Marc-André Lureau wrote:
> On Thu, May 31, 2018 at 9:46 AM, Sergio Lopez  wrote:
> > If writing to the frontend channel failed with EPIPE, don't set up a
> > retry. EPIPE is not a recoverable error, so trying again is a waste of CPU
> > cycles.
> >
> > If the vCPU writing to the serial device and emulator thread are pinned
> > to the same pCPU, it can also compromise the stability of the Guest OS,
> > as both threads will be competing for pCPU's time, with the vCPU
> > actively polling the serial device and barely giving time to the
> > emulator thread to make actual progress.
> > ---
> >  hw/char/serial.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/hw/char/serial.c b/hw/char/serial.c
> > index 2c080c9..f26e86b 100644
> > --- a/hw/char/serial.c
> > +++ b/hw/char/serial.c
> > @@ -262,6 +262,7 @@ static void serial_xmit(SerialState *s)
> >  /* in loopback mode, say that we just received a char */
> >  serial_receive1(s, &s->tsr, 1);
> >  } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
> > +   errno != EPIPE &&
> > s->tsr_retry < MAX_XMIT_RETRY) {
> 
> Instead of adding explicit handling of EPIPE, shouldn't the code be
> rewritten to treat -1 return && errno != EAGAIN as fatal?

What kind of action should it trigger if treating errno != EAGAIN as fatal?

If you meant something like calling 'abort()', I think that, while this
could be considered as the "correct" behavior, it's a bit unpractical
for the users. The most common situation for this to happen is
restarting virtlogd while having the console/serial redirected to it.
The VM will stop writing to the serial device, but otherwise the Guest
OS can probably keep working without any issues.

Sergio.



Re: [Qemu-devel] [PATCH RFC] qemu-error: introduce error_report_once

2018-05-31 Thread Stefan Hajnoczi
On Tue, May 15, 2018 at 05:13:56PM +0800, Peter Xu wrote:
> I stole the printk_once() macro.
> 
> I always wanted to be able to print some error directly if there is a
> buffer to dump, however we can't use error_report() really quite often
> when there can be any DDOS attack.  To avoid that, we can introduce a
> print-once function for it.

I like the idea.  It solves the problem of guest-triggerable error
messages that we should know about for troubleshooting but can't due to
DoS.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/3] chardev: On QIO_CHANNEL_ERR_BROKEN set errno to EPIPE

2018-05-31 Thread Sergio Lopez
On Thu, May 31, 2018 at 11:48:25AM +0200, Marc-André Lureau wrote:
> On Thu, May 31, 2018 at 9:46 AM, Sergio Lopez  wrote:
> > This allows callers to identify this potentially unrecoverable error.
> 
> (missing signed-off)

Ouch, sorry, will add it in v2.

Sergio.



Re: [Qemu-devel] [PATCH 3/3] hw/char/serial: Don't retry on serial_xmit if errno == EPIPE

2018-05-31 Thread Marc-André Lureau
Hi

On Thu, May 31, 2018 at 1:03 PM, Sergio Lopez  wrote:
> On Thu, May 31, 2018 at 11:52:01AM +0200, Marc-André Lureau wrote:
>> On Thu, May 31, 2018 at 9:46 AM, Sergio Lopez  wrote:
>> > If writing to the frontend channel failed with EPIPE, don't set up a
>> > retry. EPIPE is not a recoverable error, so trying again is a waste of CPU
>> > cycles.
>> >
>> > If the vCPU writing to the serial device and emulator thread are pinned
>> > to the same pCPU, it can also compromise the stability of the Guest OS,
>> > as both threads will be competing for pCPU's time, with the vCPU
>> > actively polling the serial device and barely giving time to the
>> > emulator thread to make actual progress.
>> > ---
>> >  hw/char/serial.c | 1 +
>> >  1 file changed, 1 insertion(+)
>> >
>> > diff --git a/hw/char/serial.c b/hw/char/serial.c
>> > index 2c080c9..f26e86b 100644
>> > --- a/hw/char/serial.c
>> > +++ b/hw/char/serial.c
>> > @@ -262,6 +262,7 @@ static void serial_xmit(SerialState *s)
>> >  /* in loopback mode, say that we just received a char */
>> >  serial_receive1(s, &s->tsr, 1);
>> >  } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
>> > +   errno != EPIPE &&
>> > s->tsr_retry < MAX_XMIT_RETRY) {
>>
>> Instead of adding explicit handling of EPIPE, shouldn't the code be
>> rewritten to treat -1 return && errno != EAGAIN as fatal?
>
> What kind of action should it trigger if treating errno != EAGAIN as fatal?

"fatal" was perhaps the wrong word. I mean that we don't go into the
retry loop and treat the chardev as disconnected (without explicit
handling of EPIPE).

>
> If you meant something like calling 'abort()', I think that, while this
> could be considered as the "correct" behavior, it's a bit unpractical
> for the users. The most common situation for this to happen is
> restarting virtlogd while having the console/serial redirected to it.
> The VM will stop writing to the serial device, but otherwise the Guest
> OS can probably keep working without any issues.
>
> Sergio.



-- 
Marc-André Lureau



Re: [Qemu-devel] [Qemu-ppc] [PATCH] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Greg Kurz
On Thu, 31 May 2018 09:38:10 +0200
Cédric Le Goater  wrote:

> On 05/30/2018 04:42 PM, Joel Stanley wrote:
> > The powerpc Linux kernel[1] and skiboot firmware[2] recently gained changes
> > that cause the Processor Compatibility Register (PCR) SPR to be cleared.
> > 
> > These changes cause Linux to fail to boot on the Qemu powernv machine
> > with an error:
> > 
> >  Trying to write privileged spr 338 (0x152) at 30017f0c
> > 
> > With this patch Qemu makes this register available as a hypervisor
> > privileged register.
> > 
> > Note that bits set in this register disable features of the processor.
> > Currently the only register state that is supported is when the register
> > is zeroed (enable all features). This is sufficient for guests to
> > once again boot.
> > 
> > [1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
> > [2] https://patchwork.ozlabs.org/patch/915932/
> > 
> > Signed-off-by: Joel Stanley 
> > ---
> >  target/ppc/helper.h |  1 +
> >  target/ppc/misc_helper.c| 10 ++
> >  target/ppc/translate_init.inc.c |  9 +++--
> >  3 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> > index 19453c68138a..d751f0e21909 100644
> > --- a/target/ppc/helper.h
> > +++ b/target/ppc/helper.h
> > @@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
> >  DEF_HELPER_1(rfid, void, env)
> >  DEF_HELPER_1(hrfid, void, env)
> >  DEF_HELPER_2(store_lpcr, void, env, tl)
> > +DEF_HELPER_2(store_pcr, void, env, tl)
> >  #endif
> >  DEF_HELPER_1(check_tlb_flush_local, void, env)
> >  DEF_HELPER_1(check_tlb_flush_global, void, env)
> > diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
> > index 8c8cba5cc6f1..40c39d08ad14 100644
> > --- a/target/ppc/misc_helper.c
> > +++ b/target/ppc/misc_helper.c
> > @@ -20,6 +20,7 @@
> >  #include "cpu.h"
> >  #include "exec/exec-all.h"
> >  #include "exec/helper-proto.h"
> > +#include "qemu/error-report.h"
> >  
> >  #include "helper_regs.h"
> >  
> > @@ -186,6 +187,15 @@ void ppc_store_msr(CPUPPCState *env, target_ulong 
> > value)
> >  hreg_store_msr(env, value, 0);
> >  }
> >  
> > +void helper_store_pcr(CPUPPCState *env, target_ulong value)
> > +{
> > +if (value != 0) {
> > +error_report("Unimplemented PCR value 0x"TARGET_FMT_lx, value);
> > +return;
> > +}
> > +env->spr[SPR_PCR] = value;  
> 
> shouldn't we use pcc->pcr_mask ? and check pcc->pcr_supported also ? 
> 

pcc->pcr_mask and ppc->pcr_supported only make sense for pseries machine
types (ie, when the spapr machine code call ppc_*_compat() functions).

The case here is different: we're running a fully emulated pnv machine,
ie, PCR can only be set by mtspr() called within the pnv guest. But TCG
doesn't implement the compatibility mode logic, ie, the CPU always run
in "raw" mode, ie, we only support PCR == 0, actually.

So, this patch looks good for me. I'm just not sure about what is
causing the build break with patchew though...

> C.
> 
> > +}
> > +
> >  /* This code is lifted from MacOnLinux. It is called whenever
> >   * THRM1,2 or 3 is read an fixes up the values in such a way
> >   * that will make MacOS not hang. These registers exist on some
> > diff --git a/target/ppc/translate_init.inc.c 
> > b/target/ppc/translate_init.inc.c
> > index ab782cb32aaa..bebe6ec5333c 100644
> > --- a/target/ppc/translate_init.inc.c
> > +++ b/target/ppc/translate_init.inc.c
> > @@ -456,6 +456,10 @@ static void spr_write_hid0_601(DisasContext *ctx, int 
> > sprn, int gprn)
> >  /* Must stop the translation as endianness may have changed */
> >  gen_stop_exception(ctx);
> >  }
> > +static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
> > +{
> > +gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
> > +}
> >  #endif
> >  
> >  /* Unified bats */
> > @@ -7957,11 +7961,12 @@ static void gen_spr_power6_common(CPUPPCState *env)
> >  #endif
> >  /*
> >   * Register PCR to report POWERPC_EXCP_PRIV_REG instead of
> > - * POWERPC_EXCP_INVAL_SPR.
> > + * POWERPC_EXCP_INVAL_SPR in userspace. Permit hypervisor access.
> >   */
> > -spr_register(env, SPR_PCR, "PCR",
> > +spr_register_hv(env, SPR_PCR, "PCR",
> >   SPR_NOACCESS, SPR_NOACCESS,
> >   SPR_NOACCESS, SPR_NOACCESS,
> > + &spr_read_generic, &spr_write_pcr,
> >   0x);
> >  }
> >  
> >   
> 
> 




Re: [Qemu-devel] [PATCH v4 10/12] migration: create a dedicated thread to release rdma resource

2018-05-31 Thread 858585 jemmy
On Thu, May 31, 2018 at 6:55 PM, Dr. David Alan Gilbert
 wrote:
> * 858585 jemmy (jemmy858...@gmail.com) wrote:
>> On Thu, May 31, 2018 at 12:50 AM, Dr. David Alan Gilbert
>>  wrote:
>> > * Lidong Chen (jemmy858...@gmail.com) wrote:
>> >> ibv_dereg_mr wait for a long time for big memory size virtual server.
>> >>
>> >> The test result is:
>> >>   10GB  326ms
>> >>   20GB  699ms
>> >>   30GB  1021ms
>> >>   40GB  1387ms
>> >>   50GB  1712ms
>> >>   60GB  2034ms
>> >>   70GB  2457ms
>> >>   80GB  2807ms
>> >>   90GB  3107ms
>> >>   100GB 3474ms
>> >>   110GB 3735ms
>> >>   120GB 4064ms
>> >>   130GB 4567ms
>> >>   140GB 4886ms
>> >>
>> >> this will cause the guest os hang for a while when migration finished.
>> >> So create a dedicated thread to release rdma resource.
>> >>
>> >> Signed-off-by: Lidong Chen 
>> >> ---
>> >>  migration/rdma.c | 21 +
>> >>  1 file changed, 17 insertions(+), 4 deletions(-)
>> >>
>> >> diff --git a/migration/rdma.c b/migration/rdma.c
>> >> index dfa4f77..1b9e261 100644
>> >> --- a/migration/rdma.c
>> >> +++ b/migration/rdma.c
>> >> @@ -2979,12 +2979,12 @@ static void 
>> >> qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
>> >>  }
>> >>  }
>> >>
>> >> -static int qio_channel_rdma_close(QIOChannel *ioc,
>> >> -  Error **errp)
>> >> +static void *qio_channel_rdma_close_thread(void *arg)
>> >>  {
>> >> -QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> >> +QIOChannelRDMA *rioc = arg;
>> >>  RDMAContext *rdmain, *rdmaout;
>> >> -trace_qemu_rdma_close();
>> >> +
>> >> +rcu_register_thread();
>> >>
>> >>  rdmain = rioc->rdmain;
>> >>  if (rdmain) {
>> >> @@ -3009,6 +3009,19 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
>> >>  g_free(rdmain);
>> >>  g_free(rdmaout);
>> >>
>> >> +rcu_unregister_thread();
>> >> +return NULL;
>> >> +}
>> >> +
>> >> +static int qio_channel_rdma_close(QIOChannel *ioc,
>> >> +  Error **errp)
>> >> +{
>> >> +QemuThread t;
>> >> +QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>> >> +trace_qemu_rdma_close();
>> >> +
>> >> +qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>> >> +   rioc, QEMU_THREAD_DETACHED);
>> >
>> > I don't think this can be this simple; consider the lock in patch 4;
>> > now that lock means qui_channel_rdma_close() can't be called in
>> > parallel; but with this change it means:
>> >
>> >
>> >  f->lock
>> >qemu_thread_create  (1)
>> > !f->lock
>> >  f->lock
>> >qemu_thread_create
>> > !f->lock
>> >
>> > so we don't really protect the thing you were trying to lock
>>
>> yes, I should not use rioc as the thread arg.
>>
>> static int qio_channel_rdma_close(QIOChannel *ioc,
>>   Error **errp)
>> {
>> QemuThread t;
>> RDMAContext *rdma[2];
>> QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
>>
>> trace_qemu_rdma_close();
>> if (rioc->rdmain || rioc->rdmaout) {
>> rdma[0] =  rioc->rdmain;
>> rdma[1] =  rioc->rdmaout;
>> qemu_thread_create(&t, "rdma cleanup", qio_channel_rdma_close_thread,
>>rdma, QEMU_THREAD_DETACHED);
>> rioc->rdmain = NULL;
>> rioc->rdmaout = NULL;
>
> Is it safe to close both directions at once?
> For example, if you get the close from the return path thread, might the
> main thread be still using it's QEMUFile in the opposite direction;
> it'll call close a little bit later?

I use rcu to protect this.  qio_channel_rdma_close_thread call synchronize_rcu,
it will wait until all other thread not access rdmain and rdmaout.

And if the return patch close the qemu file, the migration thread qemu
file will be set error soon
because the QIOChannel is closed. QIOChannelSocket also work this way.



>
> Dave
>
>> }
>> return 0;
>> }
>>
>> >
>> > Dave
>> >
>> >>  return 0;
>> >>  }
>> >>
>> >> --
>> >> 1.8.3.1
>> >>
>> > --
>> > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH v5 2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

2018-05-31 Thread Auger Eric
Hi,

On 05/31/2018 05:15 AM, Shannon Zhao wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
> 
> Fix this by skipping the registers banked by GICR.
> 
> Also for migration compatibility if the migration source (old version
> qemu) doesn't send gicd_no_migration_shift_bug = 1 to destination, then
> we shift the data of PPI to get the right data for SPI.
> 
> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Shannon Zhao 

> ---
>  hw/intc/arm_gicv3_common.c | 79 
> ++
>  hw/intc/arm_gicv3_kvm.c| 38 ++
>  include/hw/intc/arm_gicv3_common.h |  1 +
>  3 files changed, 118 insertions(+)
> 
> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
> index 7b54d52..68211a2 100644
> --- a/hw/intc/arm_gicv3_common.c
> +++ b/hw/intc/arm_gicv3_common.c
> @@ -27,6 +27,7 @@
>  #include "hw/intc/arm_gicv3_common.h"
>  #include "gicv3_internal.h"
>  #include "hw/arm/linux-boot-if.h"
> +#include "sysemu/kvm.h"
>  
>  static int gicv3_pre_save(void *opaque)
>  {
> @@ -141,6 +142,79 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>  }
>  };
>  
> +static int gicv3_gicd_no_migration_shift_bug_pre_load(void *opaque)
> +{
> +GICv3State *cs = opaque;
> +
> +   /*
> +* The gicd_no_migration_shift_bug flag is used for migration compatibilty
nit: compatibility
> +* for old version QEMU which may have the GICD bmp shift bug under KVM 
> mode.
> +* Strictly, what we want to know is whether the migration source is using
> +* KVM. Since we don't have any way to determine that, we look at whether 
> the
> +* destination is using KVM; this is close enough because for the older 
> QEMU
> +* versions with this bug KVM -> TCG migration didn't work anyway. If the
> +* source is a newer QEMU without this bug it will transmit the migration
> +* subsection which sets the flag to true; otherwise it will remain set to
> +* the value we select here.
> +*/
> +if (kvm_enabled()) {
> +cs->gicd_no_migration_shift_bug = false;
> +}
> +
> +return 0;
> +}
> +
> +static int gicv3_gicd_no_migration_shift_bug_post_load(void *opaque,
> +   int version_id)
> +{
> +GICv3State *cs = opaque;
> +
> +if (cs->gicd_no_migration_shift_bug) {
> +return 0;
> +}
> +
> +/* Older versions of QEMU had a bug in the handling of state save/restore
> + * to the KVM GICv3: they got the offset in the bitmap arrays wrong,
> + * so that instead of the data for external interrupts 32 and up
> + * starting at bit position 32 in the bitmap, it started at bit
> + * position 64. If we're receiving data from a QEMU with that bug,
> + * we must move the data down into the right place.
> + */
> +memmove(cs->group, (uint8_t *)cs->group + GIC_INTERNAL / 8,
> +sizeof(cs->group) - GIC_INTERNAL / 8);
> +memmove(cs->grpmod, (uint8_t *)cs->grpmod + GIC_INTERNAL / 8,
> +sizeof(cs->grpmod) - GIC_INTERNAL / 8);
> +memmove(cs->enabled, (uint8_t *)cs->enabled + GIC_INTERNAL / 8,
> +sizeof(cs->enabled) - GIC_INTERNAL / 8);
> +memmove(cs->pending, (uint8_t *)cs->pending + GIC_INTERNAL / 8,
> +sizeof(cs->pending) - GIC_INTERNAL / 8);
> +memmove(cs->active, (uint8_t *)cs->active + GIC_INTERNAL / 8,
> +sizeof(cs->active) - GIC_INTERNAL / 8);
> +memmove(cs->edge_trigger, (uint8_t *)cs->edge_trigger + GIC_INTERNAL / 8,
> +sizeof(cs->edge_trigger) - GIC_INTERNAL / 8);
> +
> +/*
> + * While this new version QEMU doesn't have this kind of bug as we fix 
> it,
> + * so it's need to set the flag to true to indicate that and it's 
> neccessary
nit: it needs, necessary
> + * for next migration to work from this new version QEMU.
> + */
> +cs->gicd_no_migration_shift_bug = true;
> +
> +return 0;
> +}
> +
> +const VMStateDescription vmstate_gicv3_gicd_no_migration_shift_bug = {
> +.name = "arm_gicv3/gicd_no_migration_shift_bug",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.pre_load = gicv3_gicd_no_migration_shift_bug_pre_load,
> +.post_load = gicv3_gicd_no_migration_shift_bug_post_load,
> +.fields = (VMStateField[]) {
> +VMSTATE_BOOL(gicd_no_migration_shift_bug, GICv3State),
> +VMSTATE_END_OF_LIST()
> +}
> +};
> +
>  static const VMStateDescription vmstate_gicv3 = {
>  .name = "arm_gicv3",
>  .version_id = 1,
> @@ -165,6 +239,10 @@ static const VMStateDescription vmstate_gicv3 = {
>  VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
>   vmstate_gicv3_cpu, 
> GICv3CPUState),
>  VMSTATE_END_OF_LIST()
> +},
> +

Re: [Qemu-devel] [PATCH] main-loop: drop spin_counter

2018-05-31 Thread Paolo Bonzini
On 30/05/2018 21:42, Stefan Hajnoczi wrote:
> Commit d759c951f3287fad04210a52f2dc93f94cf58c7f ("replay: push
> replay_mutex_lock up the call tree") removed the !timeout lock
> optimization in the main loop.
> 
> The idea of the optimization was to avoid ping-pongs between threads by
> keeping the Big QEMU Lock held across non-blocking (!timeout) main loop
> iterations.
> 
> A warning is printed when the main loop spins without releasing BQL for
> long periods of time.  These warnings were supposed to aid debugging but
> in practice they just alarm users.  They are considered noise because
> the cause of spinning is not shown and is hard to find.
> 
> Now that the lock optimization has been removed, there is no danger of
> hogging the BQL.  Drop the spin counter and the infamous warning.
> 
> Signed-off-by: Stefan Hajnoczi 

Very good idea, at last! :)

Paolo

> ---
>  util/main-loop.c | 25 -
>  tests/qemu-iotests/common.filter |  1 -
>  2 files changed, 26 deletions(-)
> 
> diff --git a/util/main-loop.c b/util/main-loop.c
> index 992f9b0f34..affe0403c5 100644
> --- a/util/main-loop.c
> +++ b/util/main-loop.c
> @@ -222,36 +222,11 @@ static int os_host_main_loop_wait(int64_t timeout)
>  {
>  GMainContext *context = g_main_context_default();
>  int ret;
> -static int spin_counter;
>  
>  g_main_context_acquire(context);
>  
>  glib_pollfds_fill(&timeout);
>  
> -/* If the I/O thread is very busy or we are incorrectly busy waiting in
> - * the I/O thread, this can lead to starvation of the BQL such that the
> - * VCPU threads never run.  To make sure we can detect the later case,
> - * print a message to the screen.  If we run into this condition, create
> - * a fake timeout in order to give the VCPU threads a chance to run.
> - */
> -if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
> -static bool notified;
> -
> -if (!notified && !qtest_enabled() && !qtest_driver()) {
> -warn_report("I/O thread spun for %d iterations",
> -MAX_MAIN_LOOP_SPIN);
> -notified = true;
> -}
> -
> -timeout = SCALE_MS;
> -}
> -
> -
> -if (timeout) {
> -spin_counter = 0;
> -} else {
> -spin_counter++;
> -}
>  qemu_mutex_unlock_iothread();
>  replay_mutex_unlock();
>  
> diff --git a/tests/qemu-iotests/common.filter 
> b/tests/qemu-iotests/common.filter
> index f08ee55046..2031e353a5 100644
> --- a/tests/qemu-iotests/common.filter
> +++ b/tests/qemu-iotests/common.filter
> @@ -77,7 +77,6 @@ _filter_qemu()
>  {
>  sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
>  -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
> --e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \
>  -e $'s#\r##' # QEMU monitor uses \r\n line endings
>  }
>  
> 




Re: [Qemu-devel] [qemu-s390x] [PATCH v4 00/14] MemoryDevice: use multi stage hotplug handlers

2018-05-31 Thread Igor Mammedov
On Wed, 30 May 2018 16:03:29 +0200
Paolo Bonzini  wrote:

> On 25/05/2018 14:43, David Hildenbrand wrote:
> > On 17.05.2018 10:15, David Hildenbrand wrote:  
> >> We can have devices that need certain other resources that are e.g.
> >> system resources managed by the machine. We need a clean way to assign
> >> these resources (without violating layers as brought up by Igor).
> >>
> >> One example is virtio-mem/virtio-pmem. Both device types need to be
> >> assigned some region in guest physical address space. This device memory
> >> belongs to the machine and is managed by it. However, virito devices are
> >> hotplugged using the hotplug handler their proxy device implements. So we
> >> could trigger e.g. a PCI hotplug handler for virtio-pci or a CSS/CCW
> >> hotplug handler for virtio-ccw. But definetly not the machine.
> >>
> >> Now, we can route other devices through the machine hotplug handler, to
> >> properly assign/unassign resources - like a portion in guest physical
> >> address space.
> >>
> >> v3 -> v4:
> >> - Removed the s390x bits, will send that out separately (was just a proof
> >>   that it works just fine with s390x)
> >> - Fixed a typo and reworded a comment
> >>
> >> v2 -> v3:
> >> - Added "memory-device: introduce separate config option"
> >> - Dropped "parent_bus" check from hotplug handler lookup functions
> >> - "Handly" -> "Handle" in patch description.
> >>
> >> v1 -> v2:
> >> - Use multi stage hotplug handler instead of resource handler
> >> - MemoryDevices only compiled if necessary (CONFIG_MEM_HOTPLUG)
> >> - Prepare PC/SPAPR machines properly for multi stage hotplug handlers
> >> - Route SPAPR unplug code via the hotunplug handler
> >> - Directly include s390x support. But there are no usable memory devices
> >>   yet (well, only my virtio-mem prototype)
> >> - Included "memory-device: drop assert related to align and start of 
> >> address
> >>   space"
> >>
> >> David Hildenbrand (13):
> >>   memory-device: drop assert related to align and start of address space
> >>   memory-device: introduce separate config option
> >>   pc: prepare for multi stage hotplug handlers
> >>   pc: route all memory devices through the machine hotplug handler
> >>   spapr: prepare for multi stage hotplug handlers
> >>   spapr: route all memory devices through the machine hotplug handler
> >>   spapr: handle pc-dimm unplug via hotplug handler chain
> >>   spapr: handle cpu core unplug via hotplug handler chain
> >>   memory-device: new functions to handle plug/unplug
> >>   pc-dimm: implement new memory device functions
> >>   memory-device: factor out pre-plug into hotplug handler
> >>   memory-device: factor out unplug into hotplug handler
> >>   memory-device: factor out plug into hotplug handler
> >>
> >> Igor Mammedov (1):
> >>   qdev: let machine hotplug handler to override bus hotplug handler
> >>
> >>  default-configs/i386-softmmu.mak   |   3 +-
> >>  default-configs/ppc64-softmmu.mak  |   3 +-
> >>  default-configs/x86_64-softmmu.mak |   3 +-
> >>  hw/Makefile.objs   |   2 +-
> >>  hw/core/qdev.c |   6 +-
> >>  hw/i386/pc.c   | 102 ++---
> >>  hw/mem/Makefile.objs   |   4 +-
> >>  hw/mem/memory-device.c | 129 
> >> +++--
> >>  hw/mem/pc-dimm.c   |  48 ++
> >>  hw/mem/trace-events|   4 +-
> >>  hw/ppc/spapr.c | 129 
> >> +++--
> >>  include/hw/mem/memory-device.h |  21 --
> >>  include/hw/mem/pc-dimm.h   |   3 +-
> >>  include/hw/qdev-core.h |  11 
> >>  qapi/misc.json |   2 +-
> >>  15 files changed, 330 insertions(+), 140 deletions(-)
> >>  
> > 
> > As there was no negative feedback so far, I will go ahead and assume
> > that this approach is the right thing to do.  
> 
> Ok, I'll queue this.
I think it's a bit premature.
Series would need a respin and it should also include
for completness at leas one actual user (virtio-mem) to see
how new interfaces/wrappers would be used and if they actually needed.

> Paolo
> 
> 




Re: [Qemu-devel] [PATCH 2/3] hw/arm/virt: Add another UART to the virt board

2018-05-31 Thread Jason A. Donenfeld
On Thu, May 31, 2018 at 10:32 AM, Peter Maydell
 wrote:
> It stalled on the fact that adding the second UART breaks
> UEFI images (annoyingly, UEFI iterates through UARTs in
> the DTB in the opposite direction to Linux, so if you add
> a second UART then it picks that one to use rather than
> the first one, IIRC). So it would have to be an awkward
> thing with a new machine option to request the extra UART.

Can you just add them in the opposite order for UEFI then?



Re: [Qemu-devel] [PATCH 23/27] hw/core/or-irq: Support more than 16 inputs to an OR gate

2018-05-31 Thread Paolo Bonzini
On 31/05/2018 12:50, Peter Maydell wrote:
> On 31 May 2018 at 11:21, Paolo Bonzini  wrote:
>> On 30/05/2018 19:35, Peter Maydell wrote:
>>> On 30 May 2018 at 17:59, Paolo Bonzini  wrote:
 On 21/05/2018 17:02, Peter Maydell wrote:
> On 21 May 2018 at 15:34, Paolo Bonzini  wrote:
>> Why do the levels have to be migrated at all?  It should be enough if
>> the IRQ level is either migrated manually, or restored (e.g. in
>> post_save callbacks) through other data that is migrated.
> This is standard behaviour for devices: they track their
> inbound irq/gpio lines, and then that becomes internal state for
> them that must be migrated.

 But or_irq's input are another device's outbound lines, so tracking them
 should not be necessary.  The other device would do it for or_irq.
>>>
>>> There's no mechanism in qemu_irq for the destination end to ask
>>> the source end about its current value. The information flow
>>> is strictly one-way.
>>
>> On postload, the source must call qemu_set_irq and that will cause an
>> update of the or_irq, won't it?
> 
> No, calling qemu_set_irq in postload is a bug. (You don't know
> which order the state of the source and destination devices is
> restored, so asserting a signal in postload would have different
> effects depending on whether the destination had already had
> its state restored or not.)

Hmm, I suppose the x86 world is a bit more "hierarchical" and you cannot
create a qemu_irq loop - and creating the sink always before the source
ensures that migration works fine with post_load.  I'm pretty sure that
there are devices that do that, and an interesting case (for the sake of
this thread) is the IOMMU, which prompted the introduction of
MigrationPriority.  Thanks for the lesson! :)

Maybe we should add your text to docs/devel/migration.rst, under "Device
ordering", because it explicitly mentions interrupt controllers.

Paolo



Re: [Qemu-devel] [qemu-s390x] [PATCH v4 00/14] MemoryDevice: use multi stage hotplug handlers

2018-05-31 Thread Paolo Bonzini
On 31/05/2018 13:47, Igor Mammedov wrote:
> On Wed, 30 May 2018 16:03:29 +0200
> Paolo Bonzini  wrote:
> 
>> On 25/05/2018 14:43, David Hildenbrand wrote:
>>> On 17.05.2018 10:15, David Hildenbrand wrote:  
 We can have devices that need certain other resources that are e.g.
 system resources managed by the machine. We need a clean way to assign
 these resources (without violating layers as brought up by Igor).

 One example is virtio-mem/virtio-pmem. Both device types need to be
 assigned some region in guest physical address space. This device memory
 belongs to the machine and is managed by it. However, virito devices are
 hotplugged using the hotplug handler their proxy device implements. So we
 could trigger e.g. a PCI hotplug handler for virtio-pci or a CSS/CCW
 hotplug handler for virtio-ccw. But definetly not the machine.

 Now, we can route other devices through the machine hotplug handler, to
 properly assign/unassign resources - like a portion in guest physical
 address space.

 v3 -> v4:
 - Removed the s390x bits, will send that out separately (was just a proof
   that it works just fine with s390x)
 - Fixed a typo and reworded a comment

 v2 -> v3:
 - Added "memory-device: introduce separate config option"
 - Dropped "parent_bus" check from hotplug handler lookup functions
 - "Handly" -> "Handle" in patch description.

 v1 -> v2:
 - Use multi stage hotplug handler instead of resource handler
 - MemoryDevices only compiled if necessary (CONFIG_MEM_HOTPLUG)
 - Prepare PC/SPAPR machines properly for multi stage hotplug handlers
 - Route SPAPR unplug code via the hotunplug handler
 - Directly include s390x support. But there are no usable memory devices
   yet (well, only my virtio-mem prototype)
 - Included "memory-device: drop assert related to align and start of 
 address
   space"

 David Hildenbrand (13):
   memory-device: drop assert related to align and start of address space
   memory-device: introduce separate config option
   pc: prepare for multi stage hotplug handlers
   pc: route all memory devices through the machine hotplug handler
   spapr: prepare for multi stage hotplug handlers
   spapr: route all memory devices through the machine hotplug handler
   spapr: handle pc-dimm unplug via hotplug handler chain
   spapr: handle cpu core unplug via hotplug handler chain
   memory-device: new functions to handle plug/unplug
   pc-dimm: implement new memory device functions
   memory-device: factor out pre-plug into hotplug handler
   memory-device: factor out unplug into hotplug handler
   memory-device: factor out plug into hotplug handler

 Igor Mammedov (1):
   qdev: let machine hotplug handler to override bus hotplug handler

  default-configs/i386-softmmu.mak   |   3 +-
  default-configs/ppc64-softmmu.mak  |   3 +-
  default-configs/x86_64-softmmu.mak |   3 +-
  hw/Makefile.objs   |   2 +-
  hw/core/qdev.c |   6 +-
  hw/i386/pc.c   | 102 ++---
  hw/mem/Makefile.objs   |   4 +-
  hw/mem/memory-device.c | 129 
 +++--
  hw/mem/pc-dimm.c   |  48 ++
  hw/mem/trace-events|   4 +-
  hw/ppc/spapr.c | 129 
 +++--
  include/hw/mem/memory-device.h |  21 --
  include/hw/mem/pc-dimm.h   |   3 +-
  include/hw/qdev-core.h |  11 
  qapi/misc.json |   2 +-
  15 files changed, 330 insertions(+), 140 deletions(-)
  
>>>
>>> As there was no negative feedback so far, I will go ahead and assume
>>> that this approach is the right thing to do.  
>>
>> Ok, I'll queue this.
> I think it's a bit premature.
> Series would need a respin and it should also include
> for completness at leas one actual user (virtio-mem) to see
> how new interfaces/wrappers would be used and if they actually needed.

Yeah, I noticed that a respin was needed after sending this.  Thanks,

Paolo



[Qemu-devel] [Bug 1404278] Re: tap connections not working on windows host

2018-05-31 Thread Thomas Huth
ok, thanks for checking! So I'm closing this ticket now...

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1404278

Title:
  tap connections not working on windows host

Status in QEMU:
  Fix Released

Bug description:
  using latest qemu 2.2.0 64bit for windows host (installed from
  qemu-w64-setup-20141210.exe obtained from http://qemu.weilnetz.de/w64/
  ),OpenVPN 2.6.3-I601 64bit tap adapter named tap01 and calling qemu
  using the following.

  qemu-system-x86_64.exe -m 512 -net nic -net tap,ifname=tap01 -hda
  "c:\\data\\images\\test.img"

  where the image contains a slackware 14.0 64bit install.
  The tap is bridged with the real network adapter and the bridge is given an 
ip of 10.1.1.41 (which works as the ip for the windows host). The tap adapter 
(in network connections) shows connected when the qemu vm is running. inside 
the vm, the network is given an ip of 10.1.1.143 (the netmask and default 
gateway are the same for the virtual and real pc).
  fault.
  The vm cannot see the rest of the local network or visa-versa. This used to 
work in early (0.9 32bit) versions of qemu.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1404278/+subscriptions



[Qemu-devel] [Bug 1404278] Re: tap connections not working on windows host

2018-05-31 Thread Thomas Huth
... ah, well, never mind, just saw that you already closed it :-)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1404278

Title:
  tap connections not working on windows host

Status in QEMU:
  Fix Released

Bug description:
  using latest qemu 2.2.0 64bit for windows host (installed from
  qemu-w64-setup-20141210.exe obtained from http://qemu.weilnetz.de/w64/
  ),OpenVPN 2.6.3-I601 64bit tap adapter named tap01 and calling qemu
  using the following.

  qemu-system-x86_64.exe -m 512 -net nic -net tap,ifname=tap01 -hda
  "c:\\data\\images\\test.img"

  where the image contains a slackware 14.0 64bit install.
  The tap is bridged with the real network adapter and the bridge is given an 
ip of 10.1.1.41 (which works as the ip for the windows host). The tap adapter 
(in network connections) shows connected when the qemu vm is running. inside 
the vm, the network is given an ip of 10.1.1.143 (the netmask and default 
gateway are the same for the virtual and real pc).
  fault.
  The vm cannot see the rest of the local network or visa-versa. This used to 
work in early (0.9 32bit) versions of qemu.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1404278/+subscriptions



Re: [Qemu-devel] [Qemu-ppc] [PATCH] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Joel Stanley
On 31 May 2018 at 20:57, Greg Kurz  wrote:
> On Thu, 31 May 2018 09:38:10 +0200
> Cédric Le Goater  wrote:
>
>> On 05/30/2018 04:42 PM, Joel Stanley wrote:
>> > The powerpc Linux kernel[1] and skiboot firmware[2] recently gained changes
>> > that cause the Processor Compatibility Register (PCR) SPR to be cleared.
>> >
>> > These changes cause Linux to fail to boot on the Qemu powernv machine
>> > with an error:
>> >
>> >  Trying to write privileged spr 338 (0x152) at 30017f0c
>> >
>> > With this patch Qemu makes this register available as a hypervisor
>> > privileged register.
>> >
>> > Note that bits set in this register disable features of the processor.
>> > Currently the only register state that is supported is when the register
>> > is zeroed (enable all features). This is sufficient for guests to
>> > once again boot.
>> >
>> > [1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
>> > [2] https://patchwork.ozlabs.org/patch/915932/
>> >
>> > Signed-off-by: Joel Stanley 
>> > ---
>> >  target/ppc/helper.h |  1 +
>> >  target/ppc/misc_helper.c| 10 ++
>> >  target/ppc/translate_init.inc.c |  9 +++--
>> >  3 files changed, 18 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>> > index 19453c68138a..d751f0e21909 100644
>> > --- a/target/ppc/helper.h
>> > +++ b/target/ppc/helper.h
>> > @@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
>> >  DEF_HELPER_1(rfid, void, env)
>> >  DEF_HELPER_1(hrfid, void, env)
>> >  DEF_HELPER_2(store_lpcr, void, env, tl)
>> > +DEF_HELPER_2(store_pcr, void, env, tl)
>> >  #endif
>> >  DEF_HELPER_1(check_tlb_flush_local, void, env)
>> >  DEF_HELPER_1(check_tlb_flush_global, void, env)
>> > diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
>> > index 8c8cba5cc6f1..40c39d08ad14 100644
>> > --- a/target/ppc/misc_helper.c
>> > +++ b/target/ppc/misc_helper.c
>> > @@ -20,6 +20,7 @@
>> >  #include "cpu.h"
>> >  #include "exec/exec-all.h"
>> >  #include "exec/helper-proto.h"
>> > +#include "qemu/error-report.h"
>> >
>> >  #include "helper_regs.h"
>> >
>> > @@ -186,6 +187,15 @@ void ppc_store_msr(CPUPPCState *env, target_ulong 
>> > value)
>> >  hreg_store_msr(env, value, 0);
>> >  }
>> >
>> > +void helper_store_pcr(CPUPPCState *env, target_ulong value)
>> > +{
>> > +if (value != 0) {
>> > +error_report("Unimplemented PCR value 0x"TARGET_FMT_lx, value);
>> > +return;
>> > +}
>> > +env->spr[SPR_PCR] = value;
>>
>> shouldn't we use pcc->pcr_mask ? and check pcc->pcr_supported also ?
>>
>
> pcc->pcr_mask and ppc->pcr_supported only make sense for pseries machine
> types (ie, when the spapr machine code call ppc_*_compat() functions).
>
> The case here is different: we're running a fully emulated pnv machine,
> ie, PCR can only be set by mtspr() called within the pnv guest. But TCG
> doesn't implement the compatibility mode logic, ie, the CPU always run
> in "raw" mode, ie, we only support PCR == 0, actually.

Okay, thanks for clarifying. Cedric suggested offline that I could
change "Unimplemented..." to "Invalid...". Are there any other changes
you would like?

> So, this patch looks good for me. I'm just not sure about what is
> causing the build break with patchew though...

I can't reproduce the failure here either.

Cheers,

Joel



Re: [Qemu-devel] [PATCH 2/3] hw/arm/virt: Add another UART to the virt board

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 12:48, Jason A. Donenfeld  wrote:
> On Thu, May 31, 2018 at 10:32 AM, Peter Maydell
>  wrote:
>> It stalled on the fact that adding the second UART breaks
>> UEFI images (annoyingly, UEFI iterates through UARTs in
>> the DTB in the opposite direction to Linux, so if you add
>> a second UART then it picks that one to use rather than
>> the first one, IIRC). So it would have to be an awkward
>> thing with a new machine option to request the extra UART.
>
> Can you just add them in the opposite order for UEFI then?

No, because that would break Linux guests. We don't
know what guest software we're running (and indeed
UEFI typically proceeds to boot Linux).

thanks
-- PMM



Re: [Qemu-devel] [PATCH 23/27] hw/core/or-irq: Support more than 16 inputs to an OR gate

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 12:50, Paolo Bonzini  wrote:
> On 31/05/2018 12:50, Peter Maydell wrote:
>> No, calling qemu_set_irq in postload is a bug. (You don't know
>> which order the state of the source and destination devices is
>> restored, so asserting a signal in postload would have different
>> effects depending on whether the destination had already had
>> its state restored or not.)
>
> Hmm, I suppose the x86 world is a bit more "hierarchical" and you cannot
> create a qemu_irq loop - and creating the sink always before the source
> ensures that migration works fine with post_load.  I'm pretty sure that
> there are devices that do that

If there are then they are broken, because that's not how
qemu_irqs work... (Similarly, you can't assert an irq
in a reset method, because of ordering problems.)

>, and an interesting case (for the sake of
> this thread) is the IOMMU, which prompted the introduction of
> MigrationPriority.  Thanks for the lesson! :)

This sounds like a workaround for a bug in the device implementation.
Devices shouldn't care about which order they're restored in.

thanks
-- PMM



Re: [Qemu-devel] [Qemu-ppc] [PATCH] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Greg Kurz
On Thu, 31 May 2018 21:28:08 +0930
Joel Stanley  wrote:

> On 31 May 2018 at 20:57, Greg Kurz  wrote:
> > On Thu, 31 May 2018 09:38:10 +0200
> > Cédric Le Goater  wrote:
> >  
> >> On 05/30/2018 04:42 PM, Joel Stanley wrote:  
> >> > The powerpc Linux kernel[1] and skiboot firmware[2] recently gained 
> >> > changes
> >> > that cause the Processor Compatibility Register (PCR) SPR to be cleared.
> >> >
> >> > These changes cause Linux to fail to boot on the Qemu powernv machine
> >> > with an error:
> >> >
> >> >  Trying to write privileged spr 338 (0x152) at 30017f0c
> >> >
> >> > With this patch Qemu makes this register available as a hypervisor
> >> > privileged register.
> >> >
> >> > Note that bits set in this register disable features of the processor.
> >> > Currently the only register state that is supported is when the register
> >> > is zeroed (enable all features). This is sufficient for guests to
> >> > once again boot.
> >> >
> >> > [1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
> >> > [2] https://patchwork.ozlabs.org/patch/915932/
> >> >
> >> > Signed-off-by: Joel Stanley 
> >> > ---
> >> >  target/ppc/helper.h |  1 +
> >> >  target/ppc/misc_helper.c| 10 ++
> >> >  target/ppc/translate_init.inc.c |  9 +++--
> >> >  3 files changed, 18 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> >> > index 19453c68138a..d751f0e21909 100644
> >> > --- a/target/ppc/helper.h
> >> > +++ b/target/ppc/helper.h
> >> > @@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
> >> >  DEF_HELPER_1(rfid, void, env)
> >> >  DEF_HELPER_1(hrfid, void, env)
> >> >  DEF_HELPER_2(store_lpcr, void, env, tl)
> >> > +DEF_HELPER_2(store_pcr, void, env, tl)
> >> >  #endif
> >> >  DEF_HELPER_1(check_tlb_flush_local, void, env)
> >> >  DEF_HELPER_1(check_tlb_flush_global, void, env)
> >> > diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
> >> > index 8c8cba5cc6f1..40c39d08ad14 100644
> >> > --- a/target/ppc/misc_helper.c
> >> > +++ b/target/ppc/misc_helper.c
> >> > @@ -20,6 +20,7 @@
> >> >  #include "cpu.h"
> >> >  #include "exec/exec-all.h"
> >> >  #include "exec/helper-proto.h"
> >> > +#include "qemu/error-report.h"
> >> >
> >> >  #include "helper_regs.h"
> >> >
> >> > @@ -186,6 +187,15 @@ void ppc_store_msr(CPUPPCState *env, target_ulong 
> >> > value)
> >> >  hreg_store_msr(env, value, 0);
> >> >  }
> >> >
> >> > +void helper_store_pcr(CPUPPCState *env, target_ulong value)
> >> > +{
> >> > +if (value != 0) {
> >> > +error_report("Unimplemented PCR value 0x"TARGET_FMT_lx, value);
> >> > +return;
> >> > +}
> >> > +env->spr[SPR_PCR] = value;  
> >>
> >> shouldn't we use pcc->pcr_mask ? and check pcc->pcr_supported also ?
> >>  
> >
> > pcc->pcr_mask and ppc->pcr_supported only make sense for pseries machine
> > types (ie, when the spapr machine code call ppc_*_compat() functions).
> >
> > The case here is different: we're running a fully emulated pnv machine,
> > ie, PCR can only be set by mtspr() called within the pnv guest. But TCG
> > doesn't implement the compatibility mode logic, ie, the CPU always run
> > in "raw" mode, ie, we only support PCR == 0, actually.  
> 
> Okay, thanks for clarifying. Cedric suggested offline that I could
> change "Unimplemented..." to "Invalid...". Are there any other changes
> you would like?
> 

No that's fine with me.

> > So, this patch looks good for me. I'm just not sure about what is
> > causing the build break with patchew though...  
> 
> I can't reproduce the failure here either.
> 

Same here... :-\

> Cheers,
> 
> Joel
> 

Cheers,

--
Greg



Re: [Qemu-devel] [PATCH 0/3] ahci: fix completion race condition

2018-05-31 Thread Bruce Rogers
>>> On 5/30/2018 at 6:43 PM, John Snow  wrote:
> Commit d759c951f changed the main thread lock release/reacquisition,
> and in so doing apparently jostled loose a race condition in the AHCI
> code.
> 
> Patch 2 should be sufficient to fix this, and patches 1 and 3 are just
> little trivial fixes.
> 
> This might be sufficient to fix the bug as reported at
> https://bugs.launchpad.net/qemu/+bug/1769189 
> but the nature of the timing changes make it difficult to confirm,
> so I am posting this patchset for the reporters to help test.
> 
> John Snow (3):
>   ahci: trim signatures on raise/lower
>   ahci: fix PxCI register race
>   ahci: don't schedule unnecessary BH
> 
>  hw/ide/ahci.c | 24 +++‑
>  1 file changed, 11 insertions(+), 13 deletions(‑)
> 
> ‑‑ 
> 2.14.3

In my case, I applied these 3 patches on top of v2.12 qemu under which
I can fairly reliably reproduce Windows10 disk delays. With these patches,
after quite a number of attempts, I no longer can reproduce the failure
case, so from my perspective it solves the issue.

Thanks!

Bruce



Re: [Qemu-devel] [Qemu-ppc] [PATCH] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Joel Stanley
On 31 May 2018 at 21:43, Greg Kurz  wrote:
> On Thu, 31 May 2018 21:28:08 +0930
> Joel Stanley  wrote:
>
>> On 31 May 2018 at 20:57, Greg Kurz  wrote:
>> > On Thu, 31 May 2018 09:38:10 +0200
>> > Cédric Le Goater  wrote:
>> >
>> >> On 05/30/2018 04:42 PM, Joel Stanley wrote:
>> >> > The powerpc Linux kernel[1] and skiboot firmware[2] recently gained 
>> >> > changes
>> >> > that cause the Processor Compatibility Register (PCR) SPR to be cleared.
>> >> >
>> >> > These changes cause Linux to fail to boot on the Qemu powernv machine
>> >> > with an error:
>> >> >
>> >> >  Trying to write privileged spr 338 (0x152) at 30017f0c
>> >> >
>> >> > With this patch Qemu makes this register available as a hypervisor
>> >> > privileged register.
>> >> >
>> >> > Note that bits set in this register disable features of the processor.
>> >> > Currently the only register state that is supported is when the register
>> >> > is zeroed (enable all features). This is sufficient for guests to
>> >> > once again boot.
>> >> >
>> >> > [1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
>> >> > [2] https://patchwork.ozlabs.org/patch/915932/
>> >> >
>> >> > Signed-off-by: Joel Stanley 
>> >> > ---
>> >> >  target/ppc/helper.h |  1 +
>> >> >  target/ppc/misc_helper.c| 10 ++
>> >> >  target/ppc/translate_init.inc.c |  9 +++--
>> >> >  3 files changed, 18 insertions(+), 2 deletions(-)
>> >> >
>> >> > diff --git a/target/ppc/helper.h b/target/ppc/helper.h
>> >> > index 19453c68138a..d751f0e21909 100644
>> >> > --- a/target/ppc/helper.h
>> >> > +++ b/target/ppc/helper.h
>> >> > @@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
>> >> >  DEF_HELPER_1(rfid, void, env)
>> >> >  DEF_HELPER_1(hrfid, void, env)
>> >> >  DEF_HELPER_2(store_lpcr, void, env, tl)
>> >> > +DEF_HELPER_2(store_pcr, void, env, tl)
>> >> >  #endif
>> >> >  DEF_HELPER_1(check_tlb_flush_local, void, env)
>> >> >  DEF_HELPER_1(check_tlb_flush_global, void, env)
>> >> > diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
>> >> > index 8c8cba5cc6f1..40c39d08ad14 100644
>> >> > --- a/target/ppc/misc_helper.c
>> >> > +++ b/target/ppc/misc_helper.c
>> >> > @@ -20,6 +20,7 @@
>> >> >  #include "cpu.h"
>> >> >  #include "exec/exec-all.h"
>> >> >  #include "exec/helper-proto.h"
>> >> > +#include "qemu/error-report.h"
>> >> >
>> >> >  #include "helper_regs.h"
>> >> >
>> >> > @@ -186,6 +187,15 @@ void ppc_store_msr(CPUPPCState *env, target_ulong 
>> >> > value)
>> >> >  hreg_store_msr(env, value, 0);
>> >> >  }
>> >> >
>> >> > +void helper_store_pcr(CPUPPCState *env, target_ulong value)
>> >> > +{
>> >> > +if (value != 0) {
>> >> > +error_report("Unimplemented PCR value 0x"TARGET_FMT_lx, value);
>> >> > +return;
>> >> > +}
>> >> > +env->spr[SPR_PCR] = value;
>> >>
>> >> shouldn't we use pcc->pcr_mask ? and check pcc->pcr_supported also ?
>> >>
>> >
>> > pcc->pcr_mask and ppc->pcr_supported only make sense for pseries machine
>> > types (ie, when the spapr machine code call ppc_*_compat() functions).
>> >
>> > The case here is different: we're running a fully emulated pnv machine,
>> > ie, PCR can only be set by mtspr() called within the pnv guest. But TCG
>> > doesn't implement the compatibility mode logic, ie, the CPU always run
>> > in "raw" mode, ie, we only support PCR == 0, actually.
>>
>> Okay, thanks for clarifying. Cedric suggested offline that I could
>> change "Unimplemented..." to "Invalid...". Are there any other changes
>> you would like?
>>
>
> No that's fine with me.
>
>> > So, this patch looks good for me. I'm just not sure about what is
>> > causing the build break with patchew though...
>>
>> I can't reproduce the failure here either.
>>
>
> Same here... :-\

I've got it. I wasn't careful about the PPC64 guards, so when you're
building for non-ppc64 targets it blows up. v2 incoming.



Re: [Qemu-devel] [RFC 2/3] hw/char/nrf51_uart: Implement nRF51 SoC UART

2018-05-31 Thread Peter Maydell
On 29 May 2018 at 23:03, Julia Suvorova  wrote:
> Basic implementation of nRF51 SoC UART.
> Description could be found here:
> http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.1.pdf
>
> The following features are not yet implemented:
> Control with SUSPEND/START*/STOP*
> CTS/NCTS flow control
> Mapping registers to pins
>
> Signed-off-by: Julia Suvorova 

Hi; thanks for this patch; the UART implementation looks generally
in good shape. I have some specific review comments below.

> ---
>  hw/arm/nrf51_soc.c   |   7 ++
>  hw/char/Makefile.objs|   1 +
>  hw/char/nrf51_uart.c | 232 +++
>  include/hw/arm/nrf51_soc.h   |   1 +
>  include/hw/char/nrf51_uart.h |  54 

I recommend having "implement the UART" in one patch, and
"add the new UART to this SoC" as a separate patch.

>  5 files changed, 295 insertions(+)
>  create mode 100644 hw/char/nrf51_uart.c
>  create mode 100644 include/hw/char/nrf51_uart.h
>
> diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c
> index 6fe06dcfd2..a2ee6f3f3b 100644
> --- a/hw/arm/nrf51_soc.c
> +++ b/hw/arm/nrf51_soc.c
> @@ -21,6 +21,7 @@
>  #include "cpu.h"
>
>  #include "hw/arm/nrf51_soc.h"
> +#include "hw/char/nrf51_uart.h"
>
>  #define IOMEM_BASE  0x4000
>  #define IOMEM_SIZE  0x2000
> @@ -34,6 +35,9 @@
>  #define SRAM_BASE   0x2000
>  #define SRAM_SIZE   (16 * 1024)
>
> +#define UART_BASE   0x40002000
> +#define UART_SIZE   0x1000
> +
>  static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
>  {
>  NRF51State *s = NRF51_SOC(dev_soc);
> @@ -73,6 +77,9 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error 
> **errp)
>  /* TODO: implement a cortex m0 and update this */
>  s->nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
> s->kernel_filename, ARM_CPU_TYPE_NAME("cortex-m3"));
> +
> +s->uart = nrf51_uart_create(UART_BASE, qdev_get_gpio_in(s->nvic, 2),
> +serial_hd(0));

I would recommend having the UART struct embedded in the SoC
struct and using in-place init/realize.

Devices in an SoC object should also be being mapped into a
container memory region, rather than mapping themselves
directly into system memory.

>  }
>
>  static Property nrf51_soc_properties[] = {
> diff --git a/hw/char/Makefile.objs b/hw/char/Makefile.objs
> index 1b979100b7..1060c62a54 100644
> --- a/hw/char/Makefile.objs
> +++ b/hw/char/Makefile.objs
> @@ -1,5 +1,6 @@
>  common-obj-$(CONFIG_IPACK) += ipoctal232.o
>  common-obj-$(CONFIG_ESCC) += escc.o
> +common-obj-$(CONFIG_NRF51_SOC) += nrf51_uart.o
>  common-obj-$(CONFIG_PARALLEL) += parallel.o
>  common-obj-$(CONFIG_PARALLEL) += parallel-isa.o
>  common-obj-$(CONFIG_PL011) += pl011.o
> diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c
> new file mode 100644
> index 00..2da97aa0c4
> --- /dev/null
> +++ b/hw/char/nrf51_uart.c
> @@ -0,0 +1,232 @@
> +/*
> + * nRF51 SoC UART emulation
> + *
> + * Copyright (c) 2018 Julia Suvorova 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 or
> + * (at your option) any later version.
> + */

I would suggest having a comment with the data sheet URL here.

> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "hw/registerfields.h"
> +#include "hw/char/nrf51_uart.h"
> +
> +REG32(STARTRX, 0x000)
> +REG32(STOPRX, 0x004)
> +REG32(STARTTX, 0x008)
> +REG32(STOPTX, 0x00C)
> +REG32(SUSPEND, 0x01C)
> +
> +REG32(CTS, 0x100)
> +REG32(NCTS, 0x104)
> +REG32(RXDRDY, 0x108)
> +REG32(TXDRDY, 0x11C)
> +REG32(ERROR, 0x124)
> +REG32(RXTO, 0x144)
> +
> +REG32(INTEN, 0x300)
> +FIELD(INTEN, CTS, 0, 1)
> +FIELD(INTEN, NCTS, 1, 1)
> +FIELD(INTEN, RXDRDY, 2, 1)
> +FIELD(INTEN, TXDRDY, 7, 1)
> +FIELD(INTEN, ERROR, 9, 1)
> +FIELD(INTEN, RXTO, 17, 1)
> +REG32(INTENSET, 0x304)
> +REG32(INTENCLR, 0x308)
> +REG32(ERRORSRC, 0x480)
> +REG32(ENABLE, 0x500)
> +REG32(PSELRTS, 0x508)
> +REG32(PSELTXD, 0x50C)
> +REG32(PSELCTS, 0x510)
> +REG32(PSELRXD, 0x514)
> +REG32(RXD, 0x518)
> +REG32(TXD, 0x51C)
> +REG32(BAUDRATE, 0x524)
> +REG32(CONFIG, 0x56C)
> +
> +static void nrf51_uart_update_irq(Nrf51UART *s)
> +{
> +unsigned int irq = 0;

Since this is just holding a boolean, you can make it 'bool' type,
and then you don't need the !! when you pass it to qemu_set_irq().

> +
> +irq = irq || (s->reg[A_RXDRDY] && (s->reg[A_INTEN] & 
> R_INTEN_RXDRDY_MASK));
> +irq = irq || (s->reg[A_TXDRDY] && (s->reg[A_INTEN] & 
> R_INTEN_TXDRDY_MASK));
> +irq = irq || (s->reg[A_ERROR]  && (s->reg[A_INTEN] & 
> R_INTEN_ERROR_MASK));
> +irq = irq || (s->reg[A_RXTO]   && (s->reg[A_INTEN] & R_INTEN_RXTO_MASK));
> +
> +qemu_set_irq(s->irq, !!irq);
> +}
> +
> +static uint64_t uart_read(void *opaque, hwaddr addr, unsigned int size)
> +{
> +Nrf51UART *s = NRF51_UART(opaque);
> +uint64_t r;
> +
> +switch

Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 10, 2018 at 10:08:51AM +0800, junyan...@gmx.com wrote:
> diff --git a/exec.c b/exec.c
> index fa33c29..dedeb4d 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -52,6 +52,9 @@
>  #include 
>  #endif
>  
> +/* RAM is backed by the persistent memory. */
> +#define RAM_PMEM   (1 << 3)

Please define this next to the other RAM_* flags.

Besides that:

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-devel] Recording I/O activity after KVM does a VMEXIT

2018-05-31 Thread Pavel Dovgalyuk
> From: Stefan Hajnoczi [mailto:stefa...@gmail.com]
> On Wed, May 30, 2018 at 11:19:13PM -0400, Arnabjyoti Kalita wrote:
> > I am trying to implement a 'minimal' record-replay mechanism for KVM, which
> > is similar to the one existing for TCG via -icount. I am trying to record
> > I/O events only (specifically disk and network events) when KVM does a
> > VMEXIT. This has led me to the function kvm_cpu_exec where I can clearly
> > see the different ways of handling all of the possible VMExit cases (like
> > PIO, MMIO etc.). To record network packets, I am working with the e1000
> > hardware device.
> >
> > Can I make sure that all of the network I/O, atleast for the e1000 device
> > happens through the KVM_EXIT_MMIO case and subsequent use of the
> > address_space_rw() function ? Do I also need to look at other functions as
> > well ? Also for recording disk activity, can I make sure that looking out
> > for the KVM_EXIT_MMIO and/or KVM_EXIT_PIO cases in the vmexit mechanism,
> > will be enough ?
> >
> > Let me know if there are other details that I need to take care of. I am
> > using QEMU 2.11 on a x86-64 CPU and the guest runs a Linux Kernel 4.4 with
> > Ubuntu 16.04.

The main icount-based record/replay advantage is that we don't record
any CPU IO. We record only VM IO (e.g., by using the network filter).

Disk devices may transfer data to CPU using DMA, therefore intercepting
only VMExit cases will not be enough.

Pavel Dovgalyuk




Re: [Qemu-devel] [PATCH 1/9 V5] memory, exec: switch file ram allocation functions to 'flags' parameters

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 10, 2018 at 10:08:50AM +0800, junyan...@gmx.com wrote:
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 31eae0a..0460313 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -507,6 +507,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
> void *host),
> Error **errp);
>  #ifdef __linux__
> +
> +#define QEMU_RAM_SHARE  (1UL << 0)

Please expose RAMBlock flags instead of redefining this flag and call
the argument ram_flags.  Ultimately MemoryRegion doesn't store this or
care about it.  The value simply gets passed to the RAMBlock code.

> +
> +/**
> + * qemu_ram_alloc_from_file,
> + * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified back
> + *  file or device
> + *
> + * Parameters:
> + *  @size: the size in bytes of the ram block
> + *  @mr: the memory region where the ram block is
> + *  @flags: specify the properties of the ram block, which can be one
> + *  or bit-or of following values
> + *  - QEMU_RAM_SHARE: mmap the back file or device with MAP_SHARED

s/back//


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 3/9 V5] configure: add libpmem support

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 10, 2018 at 10:08:52AM +0800, junyan...@gmx.com wrote:
>  ##
> +# check for libpmem
> +
> +if test "$libpmem" != "no"; then
> +  cat > $TMPC < +#include 
> +int main(void)
> +{
> +  pmem_is_pmem(0, 0);
> +  return 0;
> +}
> +EOF
> +  libpmem_libs="-lpmem"

PMDK uses pkg-config, please take advantage of it instead of hardcoding
flags.  See $pkg_config in ./configure for examples.

This will allow QEMU to detect PMDK on a wider range of systems.


signature.asc
Description: PGP signature


Re: [Qemu-devel] [Qemu-ppc] KVM internal error with kvm-pr on ppc64le

2018-05-31 Thread Loic Devulder
Hi,

After some test I find a way to workaround this using 2 graphics cards
in the VM.

Boot with this:
# /usr/bin/qemu-system-ppc64 -serial mon:stdio -m 1024 -cpu host -device 
virtio-scsi-pci,id=scsi0
-device scsi-cd,drive=cd0,bus=scsi0.0 -drive
media=cdrom,if=none,id=cd0,format=raw,file=/opt/openSUSE-Leap-42.3-NET-ppc64le-Build0130-Media.iso
-smp 1,threads=1 -enable-kvm -device nec-usb-xhci -device usb-tablet -device 
usb-kbd -device
virtio-vga -device virtio-gpu-pci -nodefaults

And do this in the SLOF shell (be careful, order is important but I don't know 
why...):
0 > dev /pci/vga
0 > s" device_type" delete-property
0 > dev /pci/display
0 > s" display" device-type
0 > boot

And now OS is booting using the 'virtio-gpu-pci' and not the 'virtio-vga', no 
crash anymore.

I also tried the same but with '-vga std' instead of '-device virtio-vga', in 
that case I need to
first add the 'device_type' property for 'virtio-gpuc-pci' and *after* remove 
it for the VGA card
(again, I don't know why). But even with this I still have a 'KVM Internal 
error', a new one with a
more weird suberror: "Suberror: -2130704892".

Here the trace:
Trying to load:  from: /pci@8002000/scsi@1/disk@100 ...   
Successfully loaded
SUSE Linux
#1 SMP Fri Jun 3KVM internal error. Suberror: -2130704892
extra data[0]: 1e1e1e1e
extra data[1]: 10004
extra data[2]: 2fff
extra data[3]: 32001000
extra data[4]: b2001033
extra data[5]: 4200
extra data[6]: 80002933
extra data[7]: c0fd0874
extra data[8]: 2fff
extra data[9]: c0030ed0
extra data[10]: 0
extra data[11]: 0
[...]
extra data[8190]: 0
extra data[8191]: 0
NIP c00264b0   LR d20b7298 CTR 000c XER 
2000 CPU#0
MSR b0009033 HID0   HF 8000 iidx 3 didx 3
TB   DECR 
GPR00 d20b6ee4 c0003a0fef80 d20c9230 d8008040
GPR04  0030 0800200bc3a1 c0003e8af1b0
GPR08 000c d8008040  d20bd818
GPR12 c0026440 cfa0 0020 d211de66
GPR16 d211e760 0002 c0003a0ffdfc d215
GPR20 c0003a5fb800 c0003a0e7888 c0003a9df2b8 
GPR24 c0003a9df280 c0003a0ff1f0 c0003a0e79b8 
GPR28 c0003a0e78a8 c0003a0ff1f0 c0003a0e7888 
CR 24228824  [ E  G  E  E  L  L  E  G  ] RES 
FPR00    
FPR04   414e564544006b63 45440034706f6f6c
FPR08 554e514553006b73 4954494e495f4345  
FPR12    
FPR16    
FPR20    
FPR24    
FPR28    
FPSCR 
 SRR0 c00264b0  SRR1 b0009033PVR 004b0201 VRSAVE 

SPRG0  SPRG1 cfa0  SPRG2 cfa0  SPRG3 

SPRG4  SPRG5   SPRG6   SPRG7 

HSRR0  HSRR1 
 CFAR 
 LPCR 0200
 SDR1    DAR d8008040  DSISR 4200

I don't know if this could be useful, but if someone else has the issue he can 
find a workaround now :)

On 05/30/2018 02:59 PM, Loic Devulder wrote:
> Hi guys!
> 
> I tried to start a Linux CD (I tried openSUSE Leap 42.3) to install my 
> ppc64le VM with kvm-pr (yes
> kvm-pr!) on a Power8 host and I have some issues...
> 
> My host system is an openSUSE Tumbleweed 20180525 with qemu 2.11 and kernel 
> 4.16.11.
> 
> I tried different version of qemu without any success but I tried with 
> openSUSE Leap 42.3 as a host
> (and qemu 2.11 and 2.12) with success. Kernel is 4.4 on Leap 42.3, and if I 
> update the kernel to a
> more recent version I have the same issue! So it seems to be a kvm problem, 
> not qemu.
> 
> I also tried without KVM and it works (but it is of course very slow!).
> 
> I tried to understand why and it seems to be related to the virtio-vga. Here 
> the different tries I
> made with openSUSE Tumbleweed 20180525 as host.
> 
> - With only virtio-vga and boot/messages on VGA console:
> /usr/bin/qemu-system-ppc64 -m 1024 -cpu host -device virtio-scsi-pci,id=scsi0 
> -device
> scsi-cd,drive=cd0,bus=scsi0.0 -drive
> media=cdrom,if=none,id=cd0,format=raw,file=/opt/openSUSE-Leap-42.3-NET-ppc64le-Build0130-Media.iso
> -smp 1,threads=1 -enable-kvm -device nec-usb-xhci -device usb-tablet -device 
> usb-kbd -serial
> stdio:mon -device virtio-vga -nodefaults
>

[Qemu-devel] [PATCH v2] target/ppc: Allow privileged access to SPR_PCR

2018-05-31 Thread Joel Stanley
The powerpc Linux kernel[1] and skiboot firmware[2] recently gained changes
that cause the Processor Compatibility Register (PCR) SPR to be cleared.

These changes cause Linux to fail to boot on the Qemu powernv machine
with an error:

 Trying to write privileged spr 338 (0x152) at 30017f0c

With this patch Qemu makes this register available as a hypervisor
privileged register.

Note that bits set in this register disable features of the processor.
Currently the only register state that is supported is when the register
is zeroed (enable all features). This is sufficient for guests to
once again boot.

[1] https://lkml.kernel.org/r/20180518013742.24095-1-mi...@neuling.org
[2] https://patchwork.ozlabs.org/patch/915932/

Signed-off-by: Joel Stanley 
---
v2:
 - Change error message to say Invalid instead of Unimplemented
 - Fix compile warning on other powerpc targets, thanks patchew
---
 target/ppc/helper.h |  1 +
 target/ppc/misc_helper.c| 10 ++
 target/ppc/translate_init.inc.c |  9 +++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 19453c68138a..d751f0e21909 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -17,6 +17,7 @@ DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
+DEF_HELPER_2(store_pcr, void, env, tl)
 #endif
 DEF_HELPER_1(check_tlb_flush_local, void, env)
 DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 8c8cba5cc6f1..1870f4c47f1a 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -20,6 +20,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
+#include "qemu/error-report.h"
 
 #include "helper_regs.h"
 
@@ -98,6 +99,15 @@ void helper_store_ptcr(CPUPPCState *env, target_ulong val)
 tlb_flush(CPU(cpu));
 }
 }
+
+void helper_store_pcr(CPUPPCState *env, target_ulong value)
+{
+if (value != 0) {
+error_report("Invalid PCR value 0x"TARGET_FMT_lx, value);
+return;
+}
+env->spr[SPR_PCR] = value;
+}
 #endif /* defined(TARGET_PPC64) */
 
 void helper_store_pidr(CPUPPCState *env, target_ulong val)
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index ab782cb32aaa..1a89017ddea8 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -424,6 +424,10 @@ static void spr_write_ptcr(DisasContext *ctx, int sprn, 
int gprn)
 gen_helper_store_ptcr(cpu_env, cpu_gpr[gprn]);
 }
 
+static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
+{
+gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
+}
 #endif
 #endif
 
@@ -7957,11 +7961,12 @@ static void gen_spr_power6_common(CPUPPCState *env)
 #endif
 /*
  * Register PCR to report POWERPC_EXCP_PRIV_REG instead of
- * POWERPC_EXCP_INVAL_SPR.
+ * POWERPC_EXCP_INVAL_SPR in userspace. Permit hypervisor access.
  */
-spr_register(env, SPR_PCR, "PCR",
+spr_register_hv(env, SPR_PCR, "PCR",
  SPR_NOACCESS, SPR_NOACCESS,
  SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_pcr,
  0x);
 }
 
-- 
2.17.0




[Qemu-devel] [Bug 1774412] [NEW] -icount sleep=on|off documentation is confusing

2018-05-31 Thread Andreas Gustafsson
Public bug reported:

The documentation for the -icount option in the qemu man page says:

"When the virtual cpu is sleeping, the virtual time will advance at
default speed unless sleep=on|off is specified. With sleep=on|off, the
virtual time will jump to the next timer deadline instantly whenever the
virtual cpu goes to sleep mode and will not advance if no timer is
enabled."

Taking this literally and specifying "sleep=on|off" on the command line does 
not work, so presumably the two instances of "sleep=on|off" should be either 
"sleep=on" or "sleep=off",
whichever is correct :)

Also, the synopsis line "-icount
[shift=N|auto][,rr=record|replay,rrfile=filename,rrsnapshot=snapshot"
fails to mention the sleep keyword at all.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1774412

Title:
  -icount sleep=on|off documentation is confusing

Status in QEMU:
  New

Bug description:
  The documentation for the -icount option in the qemu man page says:

  "When the virtual cpu is sleeping, the virtual time will advance at
  default speed unless sleep=on|off is specified. With sleep=on|off, the
  virtual time will jump to the next timer deadline instantly whenever
  the virtual cpu goes to sleep mode and will not advance if no timer is
  enabled."

  Taking this literally and specifying "sleep=on|off" on the command line does 
not work, so presumably the two instances of "sleep=on|off" should be either 
"sleep=on" or "sleep=off",
  whichever is correct :)

  Also, the synopsis line "-icount
  [shift=N|auto][,rr=record|replay,rrfile=filename,rrsnapshot=snapshot"
  fails to mention the sleep keyword at all.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1774412/+subscriptions



Re: [Qemu-devel] [PATCH 0/3] ahci: fix completion race condition

2018-05-31 Thread Philippe Mathieu-Daudé
Hi Bruce,

On 05/31/2018 09:21 AM, Bruce Rogers wrote:
 On 5/30/2018 at 6:43 PM, John Snow  wrote:
>> Commit d759c951f changed the main thread lock release/reacquisition,
>> and in so doing apparently jostled loose a race condition in the AHCI
>> code.
>>
>> Patch 2 should be sufficient to fix this, and patches 1 and 3 are just
>> little trivial fixes.
>>
>> This might be sufficient to fix the bug as reported at
>> https://bugs.launchpad.net/qemu/+bug/1769189 
>> but the nature of the timing changes make it difficult to confirm,
>> so I am posting this patchset for the reporters to help test.
>>
>> John Snow (3):
>>   ahci: trim signatures on raise/lower
>>   ahci: fix PxCI register race
>>   ahci: don't schedule unnecessary BH
>>
>>  hw/ide/ahci.c | 24 +++‑
>>  1 file changed, 11 insertions(+), 13 deletions(‑)
>>
>> ‑‑ 
>> 2.14.3
> 
> In my case, I applied these 3 patches on top of v2.12 qemu under which
> I can fairly reliably reproduce Windows10 disk delays. With these patches,
> after quite a number of attempts, I no longer can reproduce the failure
> case, so from my perspective it solves the issue.

Does that implicitly mean John can use your "Tested-by: Bruce Rogers
" tag?



Re: [Qemu-devel] [PATCH 2/9 V5] hostmem-file: add the 'pmem' option

2018-05-31 Thread Stefan Hajnoczi
On Thu, May 10, 2018 at 10:08:51AM +0800, junyan...@gmx.com wrote:
> From: Junyan He 
> 
> When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
> needs to know whether the backend storage is a real persistent memory,
> in order to decide whether special operations should be performed to
> ensure the data persistence.
> 
> This boolean option 'pmem' allows users to specify whether the backend
> storage of memory-backend-file is a real persistent memory. If
> 'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
> corresponding memory region.

I'm still not sure if this option is necessary since pmem_is_pmem() is
available with the introduction of the libpmem dependency.  Why can't it
be used?

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 0/3] ahci: fix completion race condition

2018-05-31 Thread Bruce Rogers
>>> On 5/31/2018 at 7:06 AM, Philippe Mathieu-Daudé  wrote:
> Hi Bruce,
> 
> On 05/31/2018 09:21 AM, Bruce Rogers wrote:
> On 5/30/2018 at 6:43 PM, John Snow  wrote:
>>> Commit d759c951f changed the main thread lock release/reacquisition,
>>> and in so doing apparently jostled loose a race condition in the AHCI
>>> code.
>>>
>>> Patch 2 should be sufficient to fix this, and patches 1 and 3 are just
>>> little trivial fixes.
>>>
>>> This might be sufficient to fix the bug as reported at
>>> https://bugs.launchpad.net/qemu/+bug/1769189 
>>> but the nature of the timing changes make it difficult to confirm,
>>> so I am posting this patchset for the reporters to help test.
>>>
>>> John Snow (3):
>>>   ahci: trim signatures on raise/lower
>>>   ahci: fix PxCI register race
>>>   ahci: don't schedule unnecessary BH
>>>
>>>  hw/ide/ahci.c | 24 +++‑
>>>  1 file changed, 11 insertions(+), 13 deletions(‑)
>>>
>>> ‑‑ 
>>> 2.14.3
>> 
>> In my case, I applied these 3 patches on top of v2.12 qemu under which
>> I can fairly reliably reproduce Windows10 disk delays. With these patches,
>> after quite a number of attempts, I no longer can reproduce the failure
>> case, so from my perspective it solves the issue.
> 
> Does that implicitly mean John can use your "Tested-by: Bruce Rogers
> " tag?

Yes. I guess I should have added that.
Bruce



Re: [Qemu-devel] [PATCH V5 0/9] nvdimm: guarantee persistence of QEMU writes to persistent memory

2018-05-31 Thread Stefan Hajnoczi
David Gilbert previously suggested a memory access interface.  I guess
it would look something like this:

  typedef struct {
  void (*memset)(void *s, int c, size_t n);
  void (*memcpy)(void *dest, const void *src, size_t n);
  } MemoryOperations;

That way code doesn't need if (pmem) A else B.  It can just do
mem_ops->foo().  Have you looked into this idea?

Also, there was a discussion about leaving the code unchanged but adding
an nvdimm_flush() call at the very end of migration.  I think someone
benchmarked it but can't find the email.  Please post a link or
summarize the results, because that approach would be much less
invasive.  Thanks!

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH] main-loop: drop spin_counter

2018-05-31 Thread Stefan Hajnoczi
On Wed, May 30, 2018 at 08:42:38PM +0100, Stefan Hajnoczi wrote:
> Commit d759c951f3287fad04210a52f2dc93f94cf58c7f ("replay: push
> replay_mutex_lock up the call tree") removed the !timeout lock
> optimization in the main loop.
> 
> The idea of the optimization was to avoid ping-pongs between threads by
> keeping the Big QEMU Lock held across non-blocking (!timeout) main loop
> iterations.
> 
> A warning is printed when the main loop spins without releasing BQL for
> long periods of time.  These warnings were supposed to aid debugging but
> in practice they just alarm users.  They are considered noise because
> the cause of spinning is not shown and is hard to find.
> 
> Now that the lock optimization has been removed, there is no danger of
> hogging the BQL.  Drop the spin counter and the infamous warning.
> 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  util/main-loop.c | 25 -
>  tests/qemu-iotests/common.filter |  1 -
>  2 files changed, 26 deletions(-)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 00/27] iommu: support txattrs, support TCG execution, implement TZ MPC

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 10:54, Peter Maydell  wrote:
> On 30 May 2018 at 17:58, Paolo Bonzini  wrote:
>> Go ahead and apply it through the ARM tree.  Thanks!
>
> It needs a v2 (there are some patches that need fixing),
> and some of the tail end of the patchset hasn't been reviewed,
> and I'm not sure if we have consensus on the IOMMU API changes
> completely yet. But if you're happy we're going in the right
> direction I can apply 1-13 via target-arm.next (that's the
> "plumb TxAttrs through various functions" patches). That will
> reduce the size of the patchset and make conflicts less likely.

I'm putting patches 1-13 (initial plumbing) and 22 (adding
VMSTATE_BOOL_SUB_ARRAY) into target-arm.next.

The rest will need a respin to fix minor nits and some
conflicts with things that are now in master.

thanks
-- PMM



Re: [Qemu-devel] [PULL 00/12] NUMA queue, 2018-05-30

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 00:05, Eduardo Habkost  wrote:
> The following changes since commit e609fa71e89c81fbe2670411be62da95dfb093e0:
>
>   Merge remote-tracking branch 
> 'remotes/edgar/tags/edgar/xilinx-next-2018-05-29-v1.for-upstream' into 
> staging (2018-05-29 13:01:11 +0100)
>
> are available in the Git repository at:
>
>   git://github.com/ehabkost/qemu.git tags/numa-next-pull-request
>
> for you to fetch changes up to c35665e1ee3d4344cd5156430ebc92310635f9bd:
>
>   tests: functional tests for QMP command set-numa-node (2018-05-30 13:19:14 
> -0300)
>
> 
> NUMA queue, 2018-05-30
>
> * New command-line option: --preconfig
>   This option allows pausing QEMU and allow the configuration
>   using QMP commands before running board initialization code.
> * New QMP set-numa-node, now made possible because of --preconfig
> * Small update on -numa error messages
>

Applied, thanks.

-- PMM



Re: [Qemu-devel] [PATCH v3] ARM: ACPI: Fix use-after-free due to memory realloc

2018-05-31 Thread Peter Maydell
On 30 May 2018 at 10:03, Auger Eric  wrote:
> Hi Shannon,
>
> On 05/30/2018 09:05 AM, Shannon Zhao wrote:
>> acpi_data_push uses g_array_set_size to resize the memory size. If there
>> is no enough contiguous memory, the address will be changed. So previous
>> pointer could not be used any more. It must update the pointer and use
>> the new one.
>>
>> Also, previous codes wrongly use le32 conversion of iort->node_offset
>> for subsequent computations that will result incorrect value if host is
>> not litlle endian. So use the non-converted one instead.
>>
>> Signed-off-by: Shannon Zhao 
> Reviewed-by: Eric Auger 



Applied to target-arm.next, thanks.

-- PMM



Re: [Qemu-devel] [PATCH v5 2/2] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 04:15, Shannon Zhao  wrote:
> While we skip the GIC_INTERNAL irqs, we don't change the register offset
> accordingly. This will overlap the GICR registers value and leave the
> last GIC_INTERNAL irq's registers out of update.
>
> Fix this by skipping the registers banked by GICR.
>
> Also for migration compatibility if the migration source (old version
> qemu) doesn't send gicd_no_migration_shift_bug = 1 to destination, then
> we shift the data of PPI to get the right data for SPI.
>
> Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
> Cc: qemu-sta...@nongnu.org
> Signed-off-by: Shannon Zhao 

> +/*
> + * While this new version QEMU doesn't have this kind of bug as we fix 
> it,
> + * so it's need to set the flag to true to indicate that and it's 
> neccessary
> + * for next migration to work from this new version QEMU.
> + */
> +cs->gicd_no_migration_shift_bug = true;

Nice catch; I would have forgotten that we needed to do this...


>
> +/* For the KVM GICv3, affinity routing is always enabled, and the
> + * GICD_IGROUPR0/GICD_IGRPMODR0/GICD_ISENABLER0/GICD_ISPENDR0/
> + * GICD_ISACTIVER0 registers are always RAZ/WI. The corresponding
> + * functionality is replaced by the GICR registers. It doesn't need to 
> sync
> + * them. So it should increase the offset to skip GIC_INTERNAL irqs.
> + * This matches the for_each_dist_irq_reg() macro which also skips the
> + * first GIC_INTERNAL irqs.
> + */
> +offset += (GIC_INTERNAL * 1) / 8;
> +if (clroffset != 0) {
> +clroffset += (1 * sizeof(uint32_t));
> +}
> +

Shouldn't we be adding the same thing to clroffset that we add to offset ?

Otherwise
Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [Qemu-devel] [PATCH v4 01/14] memory-device: drop assert related to align and start of address space

2018-05-31 Thread Igor Mammedov
On Wed, 30 May 2018 16:06:59 +0200
David Hildenbrand  wrote:

> On 30.05.2018 14:57, Igor Mammedov wrote:
> > On Tue, 29 May 2018 18:02:06 +0200
> > David Hildenbrand  wrote:
> >   
> >> On 29.05.2018 15:27, Igor Mammedov wrote:  
> >>> On Thu, 17 May 2018 10:15:14 +0200
> >>> David Hildenbrand  wrote:
> >>> 
>  The start of the address space does not have to be aligned for the
>  search. Handle this case explicitly when starting the search for a new
>  address.
> >>> That's true,
> >>> but commit message doesn't explain why address_space_start
> >>> should be allowed to be non aligned.
> >>>
> >>> At least with this assert we would notice early that
> >>> board allocating misaligned address space.
> >>> I'd keep the assert unless there is a good reason to drop it.
> >>
> >> That reason might be that I can easily crash QEMU
> >>
> >>  ./x86_64-softmmu/qemu-system-x86_64 -m 256M,maxmem=20G,slots=2 -object
> >> memory-backend-file,id=mem0,size=8192M,mem-path=/dev/zero,align=8192M
> >> -device pc-dimm,id=dimm1,memdev=mem0
> >>
> >> ERROR:hw/mem/memory-device.c:146:memory_device_get_free_addr: assertion
> >> failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start) 
> >>  
> > it looks like a different issue.
> > As I remember user visible 'align' property was added as duct tape since
> > we can't figure out alignment for DAX device no the host,
> > so it was left upto upper layers to magically figure that out.
> > 
> > However we probably shouldn't allow arbitrary or bigger aligment
> > than max page size supported by target machine/cpu
> > (i.e. currently hardcoded address_space_start alignment),
> > as it creates unnecessary fragmentation and not counted in size
> > of hotplug region (for x86 we count in additional 1Gb per memory device).
> > 
> > How about turning that assert into error check that
> > inhibits plugging in devices with alignment values
> > larger than address_space_start alignment?  
> 
> 
> Let me explain a little bit why I don't like such restrictions (for
> which I don't see a need yet):
(*) being conservative is good here because we can always relax restrictions
in future if it's needed without breaking users, but we can't take away
something thing that's been shipped (and if we do it, it typically
introduces a bunch of compat code to keep old machines working).
Also beside of giving as some freedom of movement in the future,
restrictions also to some degree prevent user from misconfiguration)

> virtio-mem devices will later have a certain block size (e.g. 4MB). I
> want to give devices during resource assignment the possibility to
> specify their alignment requirements.
size and alignment are 2 diffrent things here, alignment in our design
is dictated by backing storage page size and for performance reasons
HVA and GPA should be aligned on the same boundary, users are free
to pick another GPA manually as far as it has the same alignment.
But for automatic placement we use backend's alignment to make placement
as compact as possible but still keeping GPA aligned with HVA.

> For said virtio-mem device, this would e.g. be 4MB. (see patch 10 and 14
> of how this call "get_align" comes into play), because the addresses of
> the memory blocks are all aligned by e.g. 4MB. This is what is
> guaranteed by the device specification.
where does this 4Mb magic comes from and why block must be aligned
on this size?
 
> E.g. for DIMMs we might want to allow to specify the section size (e.g.
> 128MB on x86), otherwise e.g. Linux is not able to add all memory. (but
> we should not hardcode this, as this is a Linux specific requirement -
> still it would be nice to specify)
true, it's guest specific and we do not have restrictions here.
The only restriction we have here is that size must be multiple of
backing storage page size (i.e. alignment) so that guest would
be able to use tail page.

> So in general, a memory device might have some alignment that is to be
> taken care of.
Do we really need introducing frontend specific alignment?
I'd try reuse backend's one and go for frontend's only in case we have to.

> I don't understand right now why an upper limit on the alignment would
> make sense at all. We can easily handle it during our search. And we
> have to handle it either way during the search, if we plug some device
> with strange sizes (e.g. 1MB DIMM).
> 
> Of course, we might end up fragmenting guest physical memory, but that
> is purely a setup issue (choosing sizes of devices + main memory
> properly). I don't see a reason to error out (and of course also not to
> assert out :) ).
Agreed about assert, but I'd still prefer error out there so that users
won't crate insane config and then complain (see below and *).

Upper alignment value is useful for proper sizing of hotplug address space,
so that user could plug #slots devices upto #maxmem specified on CLI.
It's still possible to misconfigure using manual placement, but default
one just works, user 

Re: [Qemu-devel] [PATCH] KVM: GIC: Fix memory leak due to calling kvm_init_irq_routing twice

2018-05-31 Thread Peter Maydell
On 31 May 2018 at 08:16, Shannon Zhao  wrote:
> kvm_irqchip_create called by kvm_init will call kvm_init_irq_routing to
> initialize global capability variables. If we call kvm_init_irq_routing in
> GIC realize function, previous allocated memory will leak.
>
> Fix this by deleting the unnecessary call.
>
> Signed-off-by: Shannon Zhao 



Applied to target-arm.next, thanks.

-- PMM



Re: [Qemu-devel] [RFC 2/3] hw/char/nrf51_uart: Implement nRF51 SoC UART

2018-05-31 Thread sundeep subbaraya
Hi,

On Wed, May 30, 2018 at 3:33 AM, Julia Suvorova via Qemu-devel
 wrote:
> Basic implementation of nRF51 SoC UART.
> Description could be found here:
> http://infocenter.nordicsemi.com/pdf/nRF51_RM_v3.0.1.pdf
>
> The following features are not yet implemented:
> Control with SUSPEND/START*/STOP*
> CTS/NCTS flow control
> Mapping registers to pins
>
> Signed-off-by: Julia Suvorova 
> ---
>  hw/arm/nrf51_soc.c   |   7 ++
>  hw/char/Makefile.objs|   1 +
>  hw/char/nrf51_uart.c | 232 +++
>  include/hw/arm/nrf51_soc.h   |   1 +
>  include/hw/char/nrf51_uart.h |  54 
>  5 files changed, 295 insertions(+)
>  create mode 100644 hw/char/nrf51_uart.c
>  create mode 100644 include/hw/char/nrf51_uart.h
>
> diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c
> index 6fe06dcfd2..a2ee6f3f3b 100644
> --- a/hw/arm/nrf51_soc.c
> +++ b/hw/arm/nrf51_soc.c
> @@ -21,6 +21,7 @@
>  #include "cpu.h"
>
>  #include "hw/arm/nrf51_soc.h"
> +#include "hw/char/nrf51_uart.h"
>
>  #define IOMEM_BASE  0x4000
>  #define IOMEM_SIZE  0x2000
> @@ -34,6 +35,9 @@
>  #define SRAM_BASE   0x2000
>  #define SRAM_SIZE   (16 * 1024)
>
> +#define UART_BASE   0x40002000
> +#define UART_SIZE   0x1000
> +
>  static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
>  {
>  NRF51State *s = NRF51_SOC(dev_soc);
> @@ -73,6 +77,9 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error 
> **errp)
>  /* TODO: implement a cortex m0 and update this */
>  s->nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
> s->kernel_filename, ARM_CPU_TYPE_NAME("cortex-m3"));
> +
> +s->uart = nrf51_uart_create(UART_BASE, qdev_get_gpio_in(s->nvic, 2),
> +serial_hd(0));
>  }
>
>  static Property nrf51_soc_properties[] = {
> diff --git a/hw/char/Makefile.objs b/hw/char/Makefile.objs
> index 1b979100b7..1060c62a54 100644
> --- a/hw/char/Makefile.objs
> +++ b/hw/char/Makefile.objs
> @@ -1,5 +1,6 @@
>  common-obj-$(CONFIG_IPACK) += ipoctal232.o
>  common-obj-$(CONFIG_ESCC) += escc.o
> +common-obj-$(CONFIG_NRF51_SOC) += nrf51_uart.o
>  common-obj-$(CONFIG_PARALLEL) += parallel.o
>  common-obj-$(CONFIG_PARALLEL) += parallel-isa.o
>  common-obj-$(CONFIG_PL011) += pl011.o
> diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c
> new file mode 100644
> index 00..2da97aa0c4
> --- /dev/null
> +++ b/hw/char/nrf51_uart.c
> @@ -0,0 +1,232 @@
> +/*
> + * nRF51 SoC UART emulation
> + *
> + * Copyright (c) 2018 Julia Suvorova 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 or
> + * (at your option) any later version.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "hw/registerfields.h"
> +#include "hw/char/nrf51_uart.h"
> +
> +REG32(STARTRX, 0x000)
> +REG32(STOPRX, 0x004)
> +REG32(STARTTX, 0x008)
> +REG32(STOPTX, 0x00C)
> +REG32(SUSPEND, 0x01C)
> +
> +REG32(CTS, 0x100)
> +REG32(NCTS, 0x104)
> +REG32(RXDRDY, 0x108)
> +REG32(TXDRDY, 0x11C)
> +REG32(ERROR, 0x124)
> +REG32(RXTO, 0x144)
> +
> +REG32(INTEN, 0x300)
> +FIELD(INTEN, CTS, 0, 1)
> +FIELD(INTEN, NCTS, 1, 1)
> +FIELD(INTEN, RXDRDY, 2, 1)
> +FIELD(INTEN, TXDRDY, 7, 1)
> +FIELD(INTEN, ERROR, 9, 1)
> +FIELD(INTEN, RXTO, 17, 1)
> +REG32(INTENSET, 0x304)
> +REG32(INTENCLR, 0x308)
> +REG32(ERRORSRC, 0x480)
> +REG32(ENABLE, 0x500)
> +REG32(PSELRTS, 0x508)
> +REG32(PSELTXD, 0x50C)
> +REG32(PSELCTS, 0x510)
> +REG32(PSELRXD, 0x514)
> +REG32(RXD, 0x518)
> +REG32(TXD, 0x51C)
> +REG32(BAUDRATE, 0x524)
> +REG32(CONFIG, 0x56C)
> +
> +static void nrf51_uart_update_irq(Nrf51UART *s)
> +{
> +unsigned int irq = 0;
> +
> +irq = irq || (s->reg[A_RXDRDY] && (s->reg[A_INTEN] & 
> R_INTEN_RXDRDY_MASK));
> +irq = irq || (s->reg[A_TXDRDY] && (s->reg[A_INTEN] & 
> R_INTEN_TXDRDY_MASK));
> +irq = irq || (s->reg[A_ERROR]  && (s->reg[A_INTEN] & 
> R_INTEN_ERROR_MASK));
> +irq = irq || (s->reg[A_RXTO]   && (s->reg[A_INTEN] & R_INTEN_RXTO_MASK));
> +
> +qemu_set_irq(s->irq, !!irq);
> +}
> +
> +static uint64_t uart_read(void *opaque, hwaddr addr, unsigned int size)
> +{
> +Nrf51UART *s = NRF51_UART(opaque);
> +uint64_t r;
> +
> +switch (addr) {
> +case A_RXD:
> +r = s->rx_fifo[s->rx_fifo_pos];
> +if (s->rx_fifo_len > 0) {
> +s->rx_fifo_pos = (s->rx_fifo_pos + 1) % UART_FIFO_LENGTH;
> +s->rx_fifo_len--;
> +qemu_chr_fe_accept_input(&s->chr);
> +}
> +break;
> +
> +case A_INTENSET:
> +case A_INTENCLR:
> +case A_INTEN:
> +r = s->reg[A_INTEN];
> +break;
> +default:
> +r = s->reg[addr];

You can use R_* macros for registers and access regs[ ] with addr/4 as index.
It is better than using big regs[ ] array out of which most of
locations go unused.
Say, for 0x4 register you are using

Re: [Qemu-devel] [PATCH v3] qga: add mountpoint usage to GuestFilesystemInfo

2018-05-31 Thread Eric Blake

On 05/30/2018 09:19 PM, Chen Hanxiao wrote:

From: Chen Hanxiao 

This patch adds support for getting the usage of mounted
filesystem.
It's very useful when we try to monitor guest's filesystem.

Cc: Michael Roth 
Cc: Eric Blake 

Signed-off-by: Chen Hanxiao 
---



@@ -1079,7 +1083,19 @@ static GuestFilesystemInfo *build_guest_fsinfo(struct 
FsMount *mount,
  fs->type = g_strdup(mount->devtype);
  build_guest_fsinfo_for_device(devpath, fs, errp);
  
+if (statvfs(fs->mountpoint, &buf)) {

+error_setg_errno(errp, errno, "Failed to get statvfs");
+return NULL;
+}


Failing where we used to succeed is not nice.  Perhaps it would be 
better to make usage a best-effort query; and ignore statvfs() failure,



+
+used = buf.f_blocks - buf.f_bfree;
+nonroot_total = used + buf.f_bavail;
+usage = (double) used / nonroot_total;
+
+fs->usage = usage;
+
  g_free(devpath);
+
  return fs;
  }
  
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json

index 17884c7c70..68b9f60824 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -846,13 +846,14 @@
  # @name: disk name
  # @mountpoint: mount point path
  # @type: file system type string
+# @usage: file system usage, fraction between 0 and 1 (since 3.0)
  # @disk: an array of disk hardware information that the volume lies on,
  #which may be empty if the disk type is not supported
  #
  # Since: 2.2
  ##
  { 'struct': 'GuestFilesystemInfo',
-  'data': {'name': 'str', 'mountpoint': 'str', 'type': 'str',
+  'data': {'name': 'str', 'mountpoint': 'str', 'type': 'str', 'usage': 
'number',


...by instead marking this optional, as in '*usage'; and only setting 
has_usage=true when statvfs() succeeds.


Also, hitting exactly 80 columns already feels long; and if you go with 
my suggestion, you'd be lengthening it even more, so wrap the new member 
to the next line to keep length under 80.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-devel] [RFC v2 1/4] docs: mention shared state protect for OOB

2018-05-31 Thread Eric Blake

On 05/31/2018 12:16 AM, Peter Xu wrote:

Out-Of-Band handlers need to protect shared state if there is any.
Mention it in the document.

Suggested-by: Markus Armbruster 
Signed-off-by: Peter Xu 
---
  docs/devel/qapi-code-gen.txt | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt
index b9b6eabd08..aafc15f100 100644
--- a/docs/devel/qapi-code-gen.txt
+++ b/docs/devel/qapi-code-gen.txt
@@ -680,6 +680,9 @@ OOB command handlers must satisfy the following conditions:
  - It does not invoke system calls that may block,
  - It does not access guest RAM that may block when userfaultfd is
enabled for postcopy live migration.
+- It needs to protect possilbe shared states, since as long as a


s/possilbe/possible/
s/states/state/

or even 'protect any shared state'


+  command supports Out-Of-Band it means the handler can be run in
+  parallel with the same handler running in the other thread.
  
  If in doubt, do not implement OOB execution support.
  



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-devel] [PATCH v4 04/14] pc: prepare for multi stage hotplug handlers

2018-05-31 Thread Igor Mammedov
On Wed, 30 May 2018 16:13:32 +0200
David Hildenbrand  wrote:

> On 30.05.2018 15:08, Igor Mammedov wrote:
> > On Thu, 17 May 2018 10:15:17 +0200
> > David Hildenbrand  wrote:
> >   
> >> For multi stage hotplug handlers, we'll have to do some error handling
> >> in some hotplug functions, so let's use a local error variable (except
> >> for unplug requests).  
> > I'd split out introducing local error into separate patch
> > so patch would do a single thing.
> >   
> >> Also, add code to pass control to the final stage hotplug handler at the
> >> parent bus.  
> > But I don't agree with generic
> >  "} else if ("dev->parent_bus && dev->parent_bus->hotplug_handler) {"
> > forwarding, it's done by 3/14 for generic case and in case of
> > special device that needs bus handler called from machine one,
> > I'd suggest to do forwarding explicitly for that device only
> > like we do with acpi_dev.  
> 
> I decided to do it that way because it is generic and results in nicer
> recovery handling (e.g. in case pc_dimm plug fails, we can simply
> rollback all (for now MemoryDevice) previous plug operations).
rollback should be managed by the caller of pc_dimm plug
directly, so it's not relevant here.

> IMHO, the resulting code is easier to read.
> 
> From this handling it is clear that
> "if we reach the hotplug handler, and it is not some special device
> plugged by the machine (CPU, PC_DIMM), pass it on to the actual hotplug
> handler if any exists"
I strongly disagree with that it's easier to deal with.
You are basically duplicating already generalized code
from qdev_get_hotplug_handler() back into boards.

If a device doesn't have to be handled by machine handler,
than qdev_get_hotplug_handler() must return its bus handler
if any directly. So branch in question that your are copying
is a dead one, pls drop it.


> 
> > 
> >   
> >> Signed-off-by: David Hildenbrand 
> >> ---
> >>  hw/i386/pc.c | 39 +++
> >>  1 file changed, 31 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >> index d768930d02..510076e156 100644
> >> --- a/hw/i386/pc.c
> >> +++ b/hw/i386/pc.c
> >> @@ -2007,19 +2007,32 @@ static void pc_cpu_pre_plug(HotplugHandler 
> >> *hotplug_dev,
> >>  static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
> >>DeviceState *dev, Error **errp)
> >>  {
> >> +Error *local_err = NULL;
> >> +
> >> +/* final stage hotplug handler */
> >>  if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> >> -pc_cpu_pre_plug(hotplug_dev, dev, errp);
> >> +pc_cpu_pre_plug(hotplug_dev, dev, &local_err);
> >> +} else if (dev->parent_bus && dev->parent_bus->hotplug_handler) {
> >> +hotplug_handler_pre_plug(dev->parent_bus->hotplug_handler, dev,
> >> + &local_err);
> >>  }
> >> +error_propagate(errp, local_err);
> >>  }
> >>  
> >>  static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> >>DeviceState *dev, Error **errp)
> >>  {
> >> +Error *local_err = NULL;
> >> +
> >> +/* final stage hotplug handler */
> >>  if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> >> -pc_dimm_plug(hotplug_dev, dev, errp);
> >> +pc_dimm_plug(hotplug_dev, dev, &local_err);
> >>  } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> >> -pc_cpu_plug(hotplug_dev, dev, errp);
> >> +pc_cpu_plug(hotplug_dev, dev, &local_err);
> >> +} else if (dev->parent_bus && dev->parent_bus->hotplug_handler) {
> >> +hotplug_handler_plug(dev->parent_bus->hotplug_handler, dev, 
> >> &local_err);
> >>  }
> >> +error_propagate(errp, local_err);
> >>  }
> >>  
> >>  static void pc_machine_device_unplug_request_cb(HotplugHandler 
> >> *hotplug_dev,
> >> @@ -2029,7 +2042,10 @@ static void 
> >> pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
> >>  pc_dimm_unplug_request(hotplug_dev, dev, errp);
> >>  } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
> >>  pc_cpu_unplug_request_cb(hotplug_dev, dev, errp);
> >> -} else {
> >> +} else if (dev->parent_bus && dev->parent_bus->hotplug_handler) {
> >> +hotplug_handler_unplug_request(dev->parent_bus->hotplug_handler, 
> >> dev,
> >> +   errp);
> >> +} else if (!dev->parent_bus) {
> >>  error_setg(errp, "acpi: device unplug request for not supported 
> >> device"
> >> " type: %s", object_get_typename(OBJECT(dev)));
> >>  }
> >> @@ -2038,14 +2054,21 @@ static void 
> >> pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev,
> >>  static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev,
> >>  DeviceState *dev, Error **errp)
> >>  {
> >> +Error *local_err = NULL;
> >> +
> >> +/* final stage hotplug handler */
> >>

  1   2   3   4   >