Re: [Qemu-devel] [RFC] virtio: convert to use DMA api

2015-11-23 Thread Michael S. Tsirkin
On Mon, Nov 23, 2015 at 03:41:11PM +0800, Jason Wang wrote:
> Currently, all virtio devices bypass IOMMU completely. This is because
> address_space_memory is assumed and used during DMA emulation. This
> patch converts the virtio core API to use DMA API. This idea is
> 
> - introducing a new transport specific helper to query the dma address
>   space. (only pci version is implemented).
> - query and use this address space during virtio device guest memory
>   accessing
> 
> With this virtiodevices will not bypass IOMMU anymore. Little tested with
> intel_iommu=on with virtio guest DMA series posted in
> https://lkml.org/lkml/2015/10/28/64.
> 
> TODO:
> - Feature bit for this

This probably implies it should only be implemented
if device supports the modern mode.
Not sure what's the best way to handle transitional
devices.

> - Implement this for all transports

Tested with vhost, and it does not seem to work.
So more TODO:
 - make this work with vhost-user and vhost-net.

Also, it seems prudent to add
 - make the new behaviour conditional on a new property

> Signed-off-by: Jason Wang 
> ---
>  hw/block/virtio-blk.c |  2 +-
>  hw/char/virtio-serial-bus.c   |  2 +-
>  hw/scsi/virtio-scsi.c |  2 +-
>  hw/virtio/virtio-pci.c|  9 +
>  hw/virtio/virtio.c| 36 +++--
>  include/hw/virtio/virtio-access.h | 42 
> +--
>  include/hw/virtio/virtio-bus.h|  1 +
>  include/hw/virtio/virtio.h|  2 +-
>  8 files changed, 67 insertions(+), 29 deletions(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index e70fccf..5499480 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -846,7 +846,7 @@ static int virtio_blk_load_device(VirtIODevice *vdev, 
> QEMUFile *f,
>  req->next = s->rq;
>  s->rq = req;
>  
> -virtqueue_map(&req->elem);
> +virtqueue_map(vdev, &req->elem);
>  }
>  
>  return 0;
> diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
> index 497b0af..39e9ed2 100644
> --- a/hw/char/virtio-serial-bus.c
> +++ b/hw/char/virtio-serial-bus.c
> @@ -705,7 +705,7 @@ static int fetch_active_ports_list(QEMUFile *f, int 
> version_id,
>  
>  qemu_get_buffer(f, (unsigned char *)&port->elem,
>  sizeof(port->elem));
> -virtqueue_map(&port->elem);
> +virtqueue_map(&s->parent_obj, &port->elem);
>  
>  /*
>   *  Port was throttled on source machine.  Let's
> diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
> index 7655401..0734d27 100644
> --- a/hw/scsi/virtio-scsi.c
> +++ b/hw/scsi/virtio-scsi.c
> @@ -206,7 +206,7 @@ static void *virtio_scsi_load_request(QEMUFile *f, 
> SCSIRequest *sreq)
>  req = virtio_scsi_init_req(s, vs->cmd_vqs[n]);
>  qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
>  
> -virtqueue_map(&req->elem);
> +virtqueue_map(&vs->parent_obj, &req->elem);
>  
>  if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
>sizeof(VirtIOSCSICmdResp) + vs->sense_size) < 
> 0) {
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index dd48562..876505d 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1211,6 +1211,14 @@ static int virtio_pci_query_nvectors(DeviceState *d)
>  return proxy->nvectors;
>  }
>  
> +static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
> +{
> +VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
> +PCIDevice *dev = &proxy->pci_dev;
> +
> +return pci_get_address_space(dev);
> +}
> +
>  static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
> struct virtio_pci_cap *cap)
>  {
> @@ -2476,6 +2484,7 @@ static void virtio_pci_bus_class_init(ObjectClass 
> *klass, void *data)
>  k->device_plugged = virtio_pci_device_plugged;
>  k->device_unplugged = virtio_pci_device_unplugged;
>  k->query_nvectors = virtio_pci_query_nvectors;
> +k->get_dma_as = virtio_pci_get_dma_as;
>  }
>  
>  static const TypeInfo virtio_pci_bus_info = {
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 1edef59..e09430d 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -21,6 +21,7 @@
>  #include "hw/virtio/virtio-bus.h"
>  #include "migration/migration.h"
>  #include "hw/virtio/virtio-access.h"
> +#include "sysemu/dma.h"
>  
>  /*
>   * The alignment to use between consumer and producer parts of vring.
> @@ -247,6 +248,7 @@ int virtio_queue_empty(VirtQueue *vq)
>  static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
> unsigned int len)
>  {
> +AddressSpace *dma_as = virtio_get_dma_as(vq->vdev);
>  unsigned int offset;
>  int i;
>  
> @@ -254,17 +256,17 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const 
>

Re: [Qemu-devel] [RFC PATCH 0/9] vhost-nvme: new qemu nvme backend using nvme target

2015-11-23 Thread Ming Lin
On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
> 
> On 20/11/2015 01:20, Ming Lin wrote:
> > One improvment could be to use google's NVMe vendor extension that
> > I send in another thread, aslo here:
> > https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
> > 
> > Qemu side:
> > http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
> > Kernel side also here:
> > https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
> 
> How much do you get with vhost-nvme plus vendor extension, compared to
> 190 MB/s for QEMU?

There is still some bug. I'll update.

> 
> Note that in all likelihood, QEMU can actually do better than 190 MB/s,
> and gain more parallelism too, by moving the processing of the
> ioeventfds to a separate thread.  This is similar to
> hw/block/dataplane/virtio-blk.c.
> 
> It's actually pretty easy to do.  Even though
> hw/block/dataplane/virtio-blk.c is still using some old APIs, all memory
> access in QEMU is now thread-safe.  I have pending patches for 2.6 that
> cut that file down to a mere 200 lines of code, NVMe would probably be
> about the same.

Is there a git tree for your patches?

Did you mean some pseduo code as below?
1. need a iothread for each cq/sq?
2. need a AioContext for each cq/sq?

 hw/block/nvme.c | 32 ++--
 hw/block/nvme.h |  8 
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f27fd35..fed4827 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -28,6 +28,8 @@
 #include "sysemu/sysemu.h"
 #include "qapi/visitor.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
+#include "qom/object_interfaces.h"
 
 #include "nvme.h"
 
@@ -558,9 +560,22 @@ static void nvme_init_cq_eventfd(NvmeCQueue *cq)
 uint16_t offset = (cq->cqid*2+1) * (4 << NVME_CAP_DSTRD(n->bar.cap));
 
 event_notifier_init(&cq->notifier, 0);
-event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
 memory_region_add_eventfd(&n->iomem,
 0x1000 + offset, 4, false, 0, &cq->notifier);
+
+object_initialize(&cq->internal_iothread_obj,
+  sizeof(cq->internal_iothread_obj),
+  TYPE_IOTHREAD);
+user_creatable_complete(OBJECT(&cq->internal_iothread_obj), &error_abort);
+cq->iothread = &cq->internal_iothread_obj;
+cq->ctx = iothread_get_aio_context(cq->iothread);
+//Question: Need a conf.blk for each cq/sq???
+//blk_set_aio_context(cq->conf->conf.blk, cq->ctx);
+
+aio_context_acquire(cq->ctx);
+aio_set_event_notifier(cq->ctx, &cq->notifier, true,
+   nvme_cq_notifier);
+aio_context_release(cq->ctx);
 }
 
 static void nvme_sq_notifier(EventNotifier *e)
@@ -578,9 +593,22 @@ static void nvme_init_sq_eventfd(NvmeSQueue *sq)
 uint16_t offset = sq->sqid * 2 * (4 << NVME_CAP_DSTRD(n->bar.cap));
 
 event_notifier_init(&sq->notifier, 0);
-event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
 memory_region_add_eventfd(&n->iomem,
 0x1000 + offset, 4, false, 0, &sq->notifier);
+
+object_initialize(&sq->internal_iothread_obj,
+  sizeof(sq->internal_iothread_obj),
+  TYPE_IOTHREAD);
+user_creatable_complete(OBJECT(&sq->internal_iothread_obj), &error_abort);
+sq->iothread = &sq->internal_iothread_obj;
+sq->ctx = iothread_get_aio_context(sq->iothread);
+//Question: Need a conf.blk for each cq/sq???
+//blk_set_aio_context(sq->conf->conf.blk, sq->ctx);
+
+aio_context_acquire(sq->ctx);
+aio_set_event_notifier(sq->ctx, &sq->notifier, true,
+   nvme_sq_notifier);
+aio_context_release(sq->ctx);
 }
 
 static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 608f202..171ee0b 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -667,6 +667,10 @@ typedef struct NvmeSQueue {
  * do not go over this value will not result in MMIO writes (but will
  * still write the tail pointer to the "db_addr" location above). */
 uint64_teventidx_addr;
+
+IOThread *iothread;
+IOThread internal_iothread_obj;
+AioContext *ctx;
 EventNotifier notifier;
 } NvmeSQueue;
 
@@ -690,6 +694,10 @@ typedef struct NvmeCQueue {
  * do not go over this value will not result in MMIO writes (but will
  * still write the head pointer to the "db_addr" location above). */
 uint64_teventidx_addr;
+
+IOThread *iothread;
+IOThread internal_iothread_obj;
+AioContext *ctx;
 EventNotifier notifier;
 } NvmeCQueue;
 

> 
> Paolo





Re: [Qemu-devel] [PATCH for 2.5 1/1] parallels: dirty BAT properly for continuous allocations

2015-11-23 Thread Denis V. Lunev

On 11/17/2015 08:02 PM, Denis V. Lunev wrote:

From: Vladimir Sementsov-Ogievskiy 

This patch marks part of the BAT dirty properly. There is a possibility that
multy-block allocation could have one block allocated on one BAT page and
next block on the next page. The code without the patch could not save
updated position to the file.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
CC: Stefan Hajnoczi 
---
  block/parallels.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/parallels.c b/block/parallels.c
index 4f79293..f689fde 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -220,7 +220,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, 
int64_t sector_num,
  s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
  s->data_end += s->tracks;
  bitmap_set(s->bat_dirty_bmap,
-   bat_entry_off(idx) / s->bat_dirty_block, 1);
+   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
  }
  
  return bat2sect(s, idx) + sector_num % s->tracks;

Stefan,

how should we proceed with this? Should I send this as
a pull request or you could take this yourself?

Den



[Qemu-devel] [PATCH v2] numa: Clean up query-memdev error handling

2015-11-23 Thread Markus Armbruster
qmp_query_memdev() has two error paths:

* When object_get_objects_root() returns null.  It never does, so
  simply drop the useless error handling.

* When query_memdev() fails.  It leaks err then.  But any failure
  there is actually a programming error.  Switch it to &error_abort,
  and drop the useless error handling.

Messed up in commit 76b5d85 "qmp: add query-memdev".

Signed-off-by: Markus Armbruster 
---
 numa.c | 59 ++-
 1 file changed, 10 insertions(+), 49 deletions(-)

diff --git a/numa.c b/numa.c
index fdfe294..1710946 100644
--- a/numa.c
+++ b/numa.c
@@ -517,7 +517,6 @@ static int query_memdev(Object *obj, void *opaque)
 {
 MemdevList **list = opaque;
 MemdevList *m = NULL;
-Error *err = NULL;
 
 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 m = g_malloc0(sizeof(*m));
@@ -525,72 +524,34 @@ static int query_memdev(Object *obj, void *opaque)
 m->value = g_malloc0(sizeof(*m->value));
 
 m->value->size = object_property_get_int(obj, "size",
- &err);
-if (err) {
-goto error;
-}
-
+ &error_abort);
 m->value->merge = object_property_get_bool(obj, "merge",
-   &err);
-if (err) {
-goto error;
-}
-
+   &error_abort);
 m->value->dump = object_property_get_bool(obj, "dump",
-  &err);
-if (err) {
-goto error;
-}
-
+  &error_abort);
 m->value->prealloc = object_property_get_bool(obj,
-  "prealloc", &err);
-if (err) {
-goto error;
-}
-
+  "prealloc",
+  &error_abort);
 m->value->policy = object_property_get_enum(obj,
 "policy",
 "HostMemPolicy",
-&err);
-if (err) {
-goto error;
-}
-
+&error_abort);
 object_property_get_uint16List(obj, "host-nodes",
-   &m->value->host_nodes, &err);
-if (err) {
-goto error;
-}
+   &m->value->host_nodes,
+   &error_abort);
 
 m->next = *list;
 *list = m;
 }
 
 return 0;
-error:
-g_free(m->value);
-g_free(m);
-
-return -1;
 }
 
 MemdevList *qmp_query_memdev(Error **errp)
 {
-Object *obj;
+Object *obj = object_get_objects_root();
 MemdevList *list = NULL;
 
-obj = object_get_objects_root();
-if (obj == NULL) {
-return NULL;
-}
-
-if (object_child_foreach(obj, query_memdev, &list) != 0) {
-goto error;
-}
-
+object_child_foreach(obj, query_memdev, &list);
 return list;
-
-error:
-qapi_free_MemdevList(list);
-return NULL;
 }
-- 
2.4.3




Re: [Qemu-devel] [PATCH] vhost-user: set link down when the char device is closed

2015-11-23 Thread Jason Wang


On 11/20/2015 01:42 PM, Wen  wrote:
> To Jason Wang:
>
> I think this patch should be for qemu-2.5
>
> Thanks
> Wen Congyang

Hi:

I thought it was for vhost tree.

Michael:

Do you want to take this patch?

>
> On 11/11/2015 02:53 PM, Wen Congyang wrote:
>> Signed-off-by: Wen Congyang 
>> ---
>>  net/vhost-user.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/net/vhost-user.c b/net/vhost-user.c
>> index 0ebd7df..5071602 100644
>> --- a/net/vhost-user.c
>> +++ b/net/vhost-user.c
>> @@ -188,7 +188,7 @@ static void net_vhost_user_event(void *opaque, int event)
>>  qmp_set_link(name, true, &err);
>>  break;
>>  case CHR_EVENT_CLOSED:
>> -qmp_set_link(name, true, &err);
>> +qmp_set_link(name, false, &err);
>>  vhost_user_stop(queues, ncs);
>>  break;
>>  }
>>
>




Re: [Qemu-devel] [PATCH v8 0/5] implement vNVDIMM

2015-11-23 Thread Stefan Hajnoczi
On Thu, Nov 19, 2015 at 10:39:05AM +0800, Xiao Guangrong wrote:
> On 11/19/2015 04:44 AM, Michael S. Tsirkin wrote:
> >On Wed, Nov 18, 2015 at 05:18:17PM -0200, Eduardo Habkost wrote:
> >>On Wed, Nov 18, 2015 at 09:59:34AM +0800, Xiao Guangrong wrote:
> >sorry, I'm busy with 2.5 now, and this is clearly not 2.5 material.
> 
> I still see some pull requests were send our for 2.5 merge window today and
> yesterday ...
> 
> This patchset is the simplest version we can figure out to implement basic
> functionality for vNVDIMM and only minor change is needed for other code.
> It would be nice and really appreciate if it can go to 2.5.

Here is the release schedule:
http://qemu-project.org/Planning/2.5

QEMU is in hard freeze right now.  That means only critical bug fixes
are being merged.  No new features will be merged until the QEMU 2.6
development cycle begins.

Stefan


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH v4 2/5] ssi: Move ssi.h into a separate directory

2015-11-23 Thread Alistair Francis
Move the ssi.h include file into the ssi directory.

While touching the code also fix the typdef lines as
checkpatch complains.

Signed-off-by: Alistair Francis 
Reviewed-by: Peter Crosthwaite 
---
V2:
 - Change git patch to indicate rename

 hw/arm/pxa2xx.c |  2 +-
 hw/arm/spitz.c  |  2 +-
 hw/arm/stellaris.c  |  2 +-
 hw/arm/strongarm.c  |  2 +-
 hw/arm/tosa.c   |  2 +-
 hw/arm/xilinx_zynq.c|  2 +-
 hw/arm/z2.c |  2 +-
 hw/block/m25p80.c   |  2 +-
 hw/display/ads7846.c|  2 +-
 hw/display/ssd0323.c|  2 +-
 hw/microblaze/petalogix_ml605_mmu.c |  2 +-
 hw/misc/max111x.c   |  2 +-
 hw/sd/ssi-sd.c  |  2 +-
 hw/ssi/pl022.c  |  2 +-
 hw/ssi/ssi.c|  2 +-
 hw/ssi/xilinx_spi.c |  2 +-
 hw/ssi/xilinx_spips.c   |  2 +-
 include/hw/{ => ssi}/ssi.h  | 10 ++
 18 files changed, 23 insertions(+), 21 deletions(-)
 rename include/hw/{ => ssi}/ssi.h (96%)

diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 79d22d9..54bf152 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -12,7 +12,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/char/serial.h"
 #include "hw/i2c/i2c.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "sysemu/char.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 8d3cc0b..ee8f889 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -16,7 +16,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/pcmcia.h"
 #include "hw/i2c/i2c.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "hw/block/flash.h"
 #include "qemu/timer.h"
 #include "hw/devices.h"
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 0114e0a..4e5cfd1 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -8,7 +8,7 @@
  */
 
 #include "hw/sysbus.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "hw/arm/arm.h"
 #include "hw/devices.h"
 #include "qemu/timer.h"
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index 9624ecb..4d2ba02 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -34,7 +34,7 @@
 #include "hw/arm/arm.h"
 #include "sysemu/char.h"
 #include "sysemu/sysemu.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 
 //#define DEBUG
 
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 02814d7..68ad01e 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -19,7 +19,7 @@
 #include "hw/pcmcia.h"
 #include "hw/boards.h"
 #include "hw/i2c/i2c.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "sysemu/block-backend.h"
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 1c1a445..11a349b 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -25,7 +25,7 @@
 #include "sysemu/block-backend.h"
 #include "hw/loader.h"
 #include "hw/misc/zynq-xadc.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "qemu/error-report.h"
 
 #define NUM_SPI_FLASHES 4
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index b44eb76..c82fe2c 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -16,7 +16,7 @@
 #include "hw/arm/arm.h"
 #include "hw/devices.h"
 #include "hw/i2c/i2c.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
 #include "hw/block/flash.h"
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 7b9f97c..addd907 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -24,7 +24,7 @@
 #include "hw/hw.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 
 #ifndef M25P80_ERR_DEBUG
 #define M25P80_ERR_DEBUG 0
diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c
index 3f35369..cb82317 100644
--- a/hw/display/ads7846.c
+++ b/hw/display/ads7846.c
@@ -10,7 +10,7 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "ui/console.h"
 
 typedef struct {
diff --git a/hw/display/ssd0323.c b/hw/display/ssd0323.c
index 9727007..7545da8 100644
--- a/hw/display/ssd0323.c
+++ b/hw/display/ssd0323.c
@@ -10,7 +10,7 @@
 /* The controller can support a variety of different displays, but we only
implement one.  Most of the commends relating to brightness and geometry
setup are ignored. */
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 #include "ui/console.h"
 
 //#define DEBUG_SSD0323 1
diff --git a/hw/microblaze/petalogix_ml605_mmu.c 
b/hw/microblaze/petalogix_ml605_mmu.c
index 462060f..5366cec 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -35,7 +35,7 @@
 #include "sysemu/block-backend.h"
 #include "hw/char/serial.h"
 #include "exec/address-spaces.h"
-#include "hw/ssi.h"
+#include "hw/ssi/ssi.h"
 
 #include "boot.h"
 
diff --git a/hw/misc/max111x.c b/hw/misc/max111x

[Qemu-devel] [PATCH v4 0/5] Connect the SPI devices to ZynqMP

2015-11-23 Thread Alistair Francis
Connect the SPI devices to Xilinx's ZynqMP.

I also need to make some changes to the actual SPI device to
imporove the fuctionality, but for the time being this works.

V4:
 - Rebase
 - Rename the SPI busses so that they can all be accessed from the SoC
 - Only create one SPI flash device
V3:
 - Don't reach into the SoC to get the SPI Bus
V2:
 - Connect the SPI flash in the board code
 - Update git patches to properly indicate rename
 - Add sst25wf080 as a SPI flash


Alistair Francis (5):
  m25p80.c: Add sst25wf080 SPI flash device
  ssi: Move ssi.h into a separate directory
  xilinx_spips: Seperate the state struct into a header
  xlnx-zynqmp: Connect the SPI devices
  xlnx-ep108: Connect the SPI Flash

 hw/arm/pxa2xx.c |  2 +-
 hw/arm/spitz.c  |  2 +-
 hw/arm/stellaris.c  |  2 +-
 hw/arm/strongarm.c  |  2 +-
 hw/arm/tosa.c   |  2 +-
 hw/arm/xilinx_zynq.c|  2 +-
 hw/arm/xlnx-ep108.c | 16 +
 hw/arm/xlnx-zynqmp.c| 38 
 hw/arm/z2.c |  2 +-
 hw/block/m25p80.c   |  3 +-
 hw/display/ads7846.c|  2 +-
 hw/display/ssd0323.c|  2 +-
 hw/microblaze/petalogix_ml605_mmu.c |  2 +-
 hw/misc/max111x.c   |  2 +-
 hw/sd/ssi-sd.c  |  2 +-
 hw/ssi/pl022.c  |  2 +-
 hw/ssi/ssi.c|  2 +-
 hw/ssi/xilinx_spi.c |  2 +-
 hw/ssi/xilinx_spips.c   | 48 +++--
 include/hw/arm/xlnx-zynqmp.h|  3 ++
 include/hw/{ => ssi}/ssi.h  | 10 +++---
 include/hw/ssi/xilinx_spips.h   | 72 +
 22 files changed, 157 insertions(+), 63 deletions(-)
 rename include/hw/{ => ssi}/ssi.h (96%)
 create mode 100644 include/hw/ssi/xilinx_spips.h

-- 
2.5.0




[Qemu-devel] [PATCH v4 5/5] xlnx-ep108: Connect the SPI Flash

2015-11-23 Thread Alistair Francis
Connect the sst25wf080 SPI flash to the EP108 board.

Signed-off-by: Alistair Francis 
---
V4:
 - Only add one SPI flash
V3:
 - Don't reach into the SoC
V2:
 - Use sst25wf080 instead of m25p80

 hw/arm/xlnx-ep108.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/arm/xlnx-ep108.c b/hw/arm/xlnx-ep108.c
index 2899698..4ead0df 100644
--- a/hw/arm/xlnx-ep108.c
+++ b/hw/arm/xlnx-ep108.c
@@ -33,6 +33,7 @@ static struct arm_boot_info xlnx_ep108_binfo;
 static void xlnx_ep108_init(MachineState *machine)
 {
 XlnxEP108 *s = g_new0(XlnxEP108, 1);
+int i;
 Error *err = NULL;
 
 object_initialize(&s->soc, sizeof(s->soc), TYPE_XLNX_ZYNQMP);
@@ -60,6 +61,21 @@ static void xlnx_ep108_init(MachineState *machine)
  machine->ram_size);
 memory_region_add_subregion(get_system_memory(), 0, &s->ddr_ram);
 
+for (i = 0; i < XLNX_ZYNQMP_NUM_SPIS; i++) {
+SSIBus *spi_bus;
+DeviceState *flash_dev;
+qemu_irq cs_line;
+char bus_name[6];
+
+snprintf(bus_name, 6, "spi%d", i);
+spi_bus = (SSIBus *)qdev_get_child_bus(DEVICE(&s->soc), bus_name);
+
+flash_dev = ssi_create_slave(spi_bus, "sst25wf080");
+cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
+
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.spi[i]), 1, cs_line);
+}
+
 xlnx_ep108_binfo.ram_size = machine->ram_size;
 xlnx_ep108_binfo.kernel_filename = machine->kernel_filename;
 xlnx_ep108_binfo.kernel_cmdline = machine->kernel_cmdline;
-- 
2.5.0




[Qemu-devel] [PATCH v4 1/5] m25p80.c: Add sst25wf080 SPI flash device

2015-11-23 Thread Alistair Francis
Add the sst25wf080 SPI flash device.

Signed-off-by: Alistair Francis 
Reviewed-by: Peter Crosthwaite 
---

 hw/block/m25p80.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index efc43dd..7b9f97c 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -163,6 +163,7 @@ static const FlashPartInfo known_devices[] = {
 { INFO("sst25wf010",  0xbf2502,  0,  64 << 10,   2, ER_4K) },
 { INFO("sst25wf020",  0xbf2503,  0,  64 << 10,   4, ER_4K) },
 { INFO("sst25wf040",  0xbf2504,  0,  64 << 10,   8, ER_4K) },
+{ INFO("sst25wf080",  0xbf2505,  0,  64 << 10,  16, ER_4K) },
 
 /* ST Microelectronics -- newer production may have feature updates */
 { INFO("m25p05",  0x202010,  0,  32 << 10,   2, 0) },
-- 
2.5.0




[Qemu-devel] [PATCH v4 4/5] xlnx-zynqmp: Connect the SPI devices

2015-11-23 Thread Alistair Francis
Connect the Xilinx SPI devices to the ZynqMP model.

Signed-off-by: Alistair Francis 
---
V4:
 - Rename the SPI busses so that they can all be accessed from the SoC
 - Don't set the num-busses property
V3:
 - Expose the SPI Bus as part of the SoC device
V2:
 - Don't connect the SPI flash to the SoC

 hw/arm/xlnx-zynqmp.c | 38 ++
 include/hw/arm/xlnx-zynqmp.h |  3 +++
 2 files changed, 41 insertions(+)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 87553bb..6c82f83 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -56,6 +56,14 @@ static const int sdhci_intr[XLNX_ZYNQMP_NUM_SDHCI] = {
 48, 49,
 };
 
+static const uint64_t spi_addr[XLNX_ZYNQMP_NUM_SPIS] = {
+0xFF04, 0xFF05,
+};
+
+static const int spi_intr[XLNX_ZYNQMP_NUM_SPIS] = {
+19, 20,
+};
+
 typedef struct XlnxZynqMPGICRegion {
 int region_index;
 uint32_t address;
@@ -112,6 +120,12 @@ static void xlnx_zynqmp_init(Object *obj)
 qdev_set_parent_bus(DEVICE(&s->sdhci[i]),
 sysbus_get_default());
 }
+
+for (i = 0; i < XLNX_ZYNQMP_NUM_SPIS; i++) {
+object_initialize(&s->spi[i], sizeof(s->spi[i]),
+  TYPE_XILINX_SPIPS);
+qdev_set_parent_bus(DEVICE(&s->spi[i]), sysbus_get_default());
+}
 }
 
 static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -286,6 +300,30 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci[i]), 0,
gic_spi[sdhci_intr[i]]);
 }
+
+for (i = 0; i < XLNX_ZYNQMP_NUM_SPIS; i++) {
+BusState *spi_bus;
+char bus_name[6];
+
+object_property_set_bool(OBJECT(&s->spi[i]), true, "realized", &err);
+
+sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, spi_addr[i]);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->spi[i]), 0,
+   gic_spi[spi_intr[i]]);
+
+/* Rename each SPI bus after the SPI device to allow the board
+ * to access all of the busses from the SoC.
+ */
+spi_bus = qdev_get_child_bus(DEVICE(&s->spi[i]), "spi0");
+snprintf(bus_name, 6, "spi%d", i);
+memcpy((char *) spi_bus->name, bus_name, 6 * sizeof(char));
+
+/* Add the SPI buses to the SoC child bus */
+/* FIXME: This causes the later buses to be duplicated in
+ * the SPI devices printout when running qtree.
+ */
+QLIST_INSERT_HEAD(&dev->child_bus, spi_bus, sibling);
+}
 }
 
 static Property xlnx_zynqmp_props[] = {
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index d116092..f598a43 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -25,6 +25,7 @@
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
 #include "hw/sd/sdhci.h"
+#include "hw/ssi/xilinx_spips.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
 #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \
@@ -35,6 +36,7 @@
 #define XLNX_ZYNQMP_NUM_GEMS 4
 #define XLNX_ZYNQMP_NUM_UARTS 2
 #define XLNX_ZYNQMP_NUM_SDHCI 2
+#define XLNX_ZYNQMP_NUM_SPIS 2
 
 #define XLNX_ZYNQMP_NUM_OCM_BANKS 4
 #define XLNX_ZYNQMP_OCM_RAM_0_ADDRESS 0xFFFC
@@ -66,6 +68,7 @@ typedef struct XlnxZynqMPState {
 CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
 SysbusAHCIState sata;
 SDHCIState sdhci[XLNX_ZYNQMP_NUM_SDHCI];
+XilinxSPIPS spi[XLNX_ZYNQMP_NUM_SPIS];
 
 char *boot_cpu;
 ARMCPU *boot_cpu_ptr;
-- 
2.5.0




[Qemu-devel] [PATCH v4 3/5] xilinx_spips: Seperate the state struct into a header

2015-11-23 Thread Alistair Francis
Seperate out the XilinxSPIPS struct into a seperate header
file.

Signed-off-by: Alistair Francis 
---
V4:
 - Don't split off R_MOD_ID and hardcode R_MAX
V2:
 - Only split out required #defines
 - Prefix XLNX_SPIPS_

 hw/ssi/xilinx_spips.c | 46 +++
 include/hw/ssi/xilinx_spips.h | 72 +++
 2 files changed, 76 insertions(+), 42 deletions(-)
 create mode 100644 include/hw/ssi/xilinx_spips.h

diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index e9471ff..2111719 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -29,6 +29,7 @@
 #include "qemu/fifo8.h"
 #include "hw/ssi/ssi.h"
 #include "qemu/bitops.h"
+#include "hw/ssi/xilinx_spips.h"
 
 #ifndef XILINX_SPIPS_ERR_DEBUG
 #define XILINX_SPIPS_ERR_DEBUG 0
@@ -103,8 +104,6 @@
 
 #define R_MOD_ID(0xFC / 4)
 
-#define R_MAX (R_MOD_ID+1)
-
 /* size of TXRX FIFOs */
 #define RXFF_A  32
 #define TXFF_A  32
@@ -135,30 +134,6 @@ typedef enum {
 } FlashCMD;
 
 typedef struct {
-SysBusDevice parent_obj;
-
-MemoryRegion iomem;
-MemoryRegion mmlqspi;
-
-qemu_irq irq;
-int irqline;
-
-uint8_t num_cs;
-uint8_t num_busses;
-
-uint8_t snoop_state;
-qemu_irq *cs_lines;
-SSIBus **spi;
-
-Fifo8 rx_fifo;
-Fifo8 tx_fifo;
-
-uint8_t num_txrx_bytes;
-
-uint32_t regs[R_MAX];
-} XilinxSPIPS;
-
-typedef struct {
 XilinxSPIPS parent_obj;
 
 uint8_t lqspi_buf[LQSPI_CACHE_SIZE];
@@ -174,19 +149,6 @@ typedef struct XilinxSPIPSClass {
 uint32_t tx_fifo_size;
 } XilinxSPIPSClass;
 
-#define TYPE_XILINX_SPIPS "xlnx.ps7-spi"
-#define TYPE_XILINX_QSPIPS "xlnx.ps7-qspi"
-
-#define XILINX_SPIPS(obj) \
- OBJECT_CHECK(XilinxSPIPS, (obj), TYPE_XILINX_SPIPS)
-#define XILINX_SPIPS_CLASS(klass) \
- OBJECT_CLASS_CHECK(XilinxSPIPSClass, (klass), TYPE_XILINX_SPIPS)
-#define XILINX_SPIPS_GET_CLASS(obj) \
- OBJECT_GET_CLASS(XilinxSPIPSClass, (obj), TYPE_XILINX_SPIPS)
-
-#define XILINX_QSPIPS(obj) \
- OBJECT_CHECK(XilinxQSPIPS, (obj), TYPE_XILINX_QSPIPS)
-
 static inline int num_effective_busses(XilinxSPIPS *s)
 {
 return (s->regs[R_LQSPI_CFG] & LQSPI_CFG_SEP_BUS &&
@@ -257,7 +219,7 @@ static void xilinx_spips_reset(DeviceState *d)
 XilinxSPIPS *s = XILINX_SPIPS(d);
 
 int i;
-for (i = 0; i < R_MAX; i++) {
+for (i = 0; i < XLNX_SPIPS_R_MAX; i++) {
 s->regs[i] = 0;
 }
 
@@ -664,7 +626,7 @@ static void xilinx_spips_realize(DeviceState *dev, Error 
**errp)
 }
 
 memory_region_init_io(&s->iomem, OBJECT(s), xsc->reg_ops, s,
-  "spi", R_MAX*4);
+  "spi", XLNX_SPIPS_R_MAX*4);
 sysbus_init_mmio(sbd, &s->iomem);
 
 s->irqline = -1;
@@ -708,7 +670,7 @@ static const VMStateDescription vmstate_xilinx_spips = {
 .fields = (VMStateField[]) {
 VMSTATE_FIFO8(tx_fifo, XilinxSPIPS),
 VMSTATE_FIFO8(rx_fifo, XilinxSPIPS),
-VMSTATE_UINT32_ARRAY(regs, XilinxSPIPS, R_MAX),
+VMSTATE_UINT32_ARRAY(regs, XilinxSPIPS, XLNX_SPIPS_R_MAX),
 VMSTATE_UINT8(snoop_state, XilinxSPIPS),
 VMSTATE_END_OF_LIST()
 }
diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h
new file mode 100644
index 000..dbb9eef
--- /dev/null
+++ b/include/hw/ssi/xilinx_spips.h
@@ -0,0 +1,72 @@
+/*
+ * Header file for the Xilinx Zynq SPI controller
+ *
+ * Copyright (C) 2015 Xilinx Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_SPIPS_H
+#define XLNX_SPIPS_H
+
+#include "hw/ssi/ssi.h"
+#include "qemu/fifo8.h"
+
+typedef struct XilinxSPIPS XilinxSPIPS;
+
+#define XLNX_SPIPS_R_MAX(0x100 / 4)
+
+struct XilinxSPIPS {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+MemoryRegion mmlqspi;
+
+qemu_irq irq;
+int irqline;
+
+uint8_t num_cs;
+uint8_t num_busses;
+
+uint8_t snoop_state;
+qemu_irq *cs_lines;
+

Re: [Qemu-devel] [PATCH v2] numa: Clean up query-memdev error handling

2015-11-23 Thread Michael S. Tsirkin
On Mon, Nov 23, 2015 at 09:35:31AM +0100, Markus Armbruster wrote:
> qmp_query_memdev() has two error paths:
> 
> * When object_get_objects_root() returns null.  It never does, so
>   simply drop the useless error handling.
> 
> * When query_memdev() fails.  It leaks err then.  But any failure
>   there is actually a programming error.  Switch it to &error_abort,
>   and drop the useless error handling.
> 
> Messed up in commit 76b5d85 "qmp: add query-memdev".
> 
> Signed-off-by: Markus Armbruster 


Post 2.5 right?

> ---
>  numa.c | 59 ++-
>  1 file changed, 10 insertions(+), 49 deletions(-)
> 
> diff --git a/numa.c b/numa.c
> index fdfe294..1710946 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -517,7 +517,6 @@ static int query_memdev(Object *obj, void *opaque)
>  {
>  MemdevList **list = opaque;
>  MemdevList *m = NULL;
> -Error *err = NULL;
>  
>  if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
>  m = g_malloc0(sizeof(*m));
> @@ -525,72 +524,34 @@ static int query_memdev(Object *obj, void *opaque)
>  m->value = g_malloc0(sizeof(*m->value));
>  
>  m->value->size = object_property_get_int(obj, "size",
> - &err);
> -if (err) {
> -goto error;
> -}
> -
> + &error_abort);
>  m->value->merge = object_property_get_bool(obj, "merge",
> -   &err);
> -if (err) {
> -goto error;
> -}
> -
> +   &error_abort);
>  m->value->dump = object_property_get_bool(obj, "dump",
> -  &err);
> -if (err) {
> -goto error;
> -}
> -
> +  &error_abort);
>  m->value->prealloc = object_property_get_bool(obj,
> -  "prealloc", &err);
> -if (err) {
> -goto error;
> -}
> -
> +  "prealloc",
> +  &error_abort);
>  m->value->policy = object_property_get_enum(obj,
>  "policy",
>  "HostMemPolicy",
> -&err);
> -if (err) {
> -goto error;
> -}
> -
> +&error_abort);
>  object_property_get_uint16List(obj, "host-nodes",
> -   &m->value->host_nodes, &err);
> -if (err) {
> -goto error;
> -}
> +   &m->value->host_nodes,
> +   &error_abort);
>  
>  m->next = *list;
>  *list = m;
>  }
>  
>  return 0;
> -error:
> -g_free(m->value);
> -g_free(m);
> -
> -return -1;
>  }
>  
>  MemdevList *qmp_query_memdev(Error **errp)
>  {
> -Object *obj;
> +Object *obj = object_get_objects_root();
>  MemdevList *list = NULL;
>  
> -obj = object_get_objects_root();
> -if (obj == NULL) {
> -return NULL;
> -}
> -
> -if (object_child_foreach(obj, query_memdev, &list) != 0) {
> -goto error;
> -}
> -
> +object_child_foreach(obj, query_memdev, &list);
>  return list;
> -
> -error:
> -qapi_free_MemdevList(list);
> -return NULL;
>  }
> -- 
> 2.4.3



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Wen Congyang
On 11/20/2015 05:59 PM, Fam Zheng wrote:
> "s->bitmap" tracks done sectors, we only check bit states without using any
> iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
> more memory efficient.
> 
> Meanwhile, rename it to done_bitmap, to reflect the intention.
> 
> Signed-off-by: Fam Zheng 
> ---
>  block/backup.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/block/backup.c b/block/backup.c
> index 3b39119..d408f98 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -22,6 +22,7 @@
>  #include "qapi/qmp/qerror.h"
>  #include "qemu/ratelimit.h"
>  #include "sysemu/block-backend.h"
> +#include "qemu/bitmap.h"
>  
>  #define BACKUP_CLUSTER_BITS 16
>  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
> @@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
>  BlockdevOnError on_target_error;
>  CoRwlock flush_rwlock;
>  uint64_t sectors_read;
> -HBitmap *bitmap;
> +unsigned long *done_bitmap;
>  QLIST_HEAD(, CowRequest) inflight_reqs;
>  } BackupBlockJob;
>  
> @@ -113,7 +114,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
> *bs,
>  cow_request_begin(&cow_request, job, start, end);
>  
>  for (; start < end; start++) {
> -if (hbitmap_get(job->bitmap, start)) {
> +if (test_bit(start, job->done_bitmap)) {
>  trace_backup_do_cow_skip(job, start);
>  continue; /* already copied */
>  }
> @@ -164,7 +165,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
> *bs,
>  goto out;
>  }
>  
> -hbitmap_set(job->bitmap, start, 1);
> +bitmap_set(job->done_bitmap, start, 1);

You can use set_bit() here.

Thanks
Wen Congyang

>  
>  /* Publish progress, guest I/O counts as progress too.  Note that the
>   * offset field is an opaque progress value, it is not a disk offset.
> @@ -394,7 +395,7 @@ static void coroutine_fn backup_run(void *opaque)
>  start = 0;
>  end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>  
> -job->bitmap = hbitmap_alloc(end, 0);
> +job->done_bitmap = bitmap_new(end);
>  
>  bdrv_set_enable_write_cache(target, true);
>  if (target->blk) {
> @@ -475,7 +476,7 @@ static void coroutine_fn backup_run(void *opaque)
>  /* wait until pending backup_do_cow() calls have completed */
>  qemu_co_rwlock_wrlock(&job->flush_rwlock);
>  qemu_co_rwlock_unlock(&job->flush_rwlock);
> -hbitmap_free(job->bitmap);
> +g_free(job->done_bitmap);
>  
>  if (target->blk) {
>  blk_iostatus_disable(target->blk);
> 




Re: [Qemu-devel] [PATCH] qemu-iotests: Add -nographic when starting QEMU in 119 and 120

2015-11-23 Thread Fam Zheng
On Mon, 11/23 08:33, Markus Armbruster wrote:
> Fam Zheng  writes:
> 
> > Otherwise, a window flashes on my desktop (built with SDL). Add this as
> > other cases have it.
> >
> > Signed-off-by: Fam Zheng 
> >
> > ---
> > v2: Fix 119 too. [Max]
> > ---
> >  tests/qemu-iotests/119 | 2 +-
> >  tests/qemu-iotests/120 | 2 +-
> >  2 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/tests/qemu-iotests/119 b/tests/qemu-iotests/119
> > index 9a11f1b..cc6ec07 100755
> > --- a/tests/qemu-iotests/119
> > +++ b/tests/qemu-iotests/119
> > @@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'}
> >{'execute': 'human-monitor-command',
> > 'arguments': {'command-line': 'qemu-io drv \"read -P 0 0 64k\"'}}
> >{'execute': 'quit'}" \
> > -| $QEMU -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \
> > +| $QEMU -nographic -drive id=drv,if=none,file="$TEST_IMG",driver=nbd \
> >  -qmp stdio -nodefaults \
> >  | _filter_qmp | _filter_qemu_io
> >  
> > diff --git a/tests/qemu-iotests/120 b/tests/qemu-iotests/120
> > index 9f13078..d899a3f 100755
> > --- a/tests/qemu-iotests/120
> > +++ b/tests/qemu-iotests/120
> > @@ -49,7 +49,7 @@ echo "{'execute': 'qmp_capabilities'}
> >{'execute': 'human-monitor-command',
> > 'arguments': {'command-line': 'qemu-io drv \"write -P 42 0 64k\"'}}
> >{'execute': 'quit'}" \
> > -| $QEMU -qmp stdio -nodefaults \
> > +| $QEMU -qmp stdio -nographic -nodefaults \
> >  -drive 
> > id=drv,if=none,file="$TEST_IMG",driver=raw,file.driver=$IMGFMT \
> >  | _filter_qmp | _filter_qemu_io
> >  $QEMU_IO -c 'read -P 42 0 64k' "$TEST_IMG" | _filter_qemu_io
> 
> -nographic is legacy.  Using legacy options is fine, but I wonder
> whether you want -display none here.  Unless you really want to redirect
> serial and parallel port, I suspect you do.

I was just following other cases, I'm fine with either way but I suspect it's
worth to convert them. So if there is no other problem let's be consistent and
stick to -nographic for now. Thanks for pointing out, though.

Fam



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Fam Zheng
On Mon, 11/23 17:01, Wen Congyang wrote:
> On 11/20/2015 05:59 PM, Fam Zheng wrote:
> > "s->bitmap" tracks done sectors, we only check bit states without using any
> > iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
> > more memory efficient.
> > 
> > Meanwhile, rename it to done_bitmap, to reflect the intention.
> > 
> > Signed-off-by: Fam Zheng 
> > ---
> >  block/backup.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/block/backup.c b/block/backup.c
> > index 3b39119..d408f98 100644
> > --- a/block/backup.c
> > +++ b/block/backup.c
> > @@ -22,6 +22,7 @@
> >  #include "qapi/qmp/qerror.h"
> >  #include "qemu/ratelimit.h"
> >  #include "sysemu/block-backend.h"
> > +#include "qemu/bitmap.h"
> >  
> >  #define BACKUP_CLUSTER_BITS 16
> >  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
> > @@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
> >  BlockdevOnError on_target_error;
> >  CoRwlock flush_rwlock;
> >  uint64_t sectors_read;
> > -HBitmap *bitmap;
> > +unsigned long *done_bitmap;
> >  QLIST_HEAD(, CowRequest) inflight_reqs;
> >  } BackupBlockJob;
> >  
> > @@ -113,7 +114,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
> > *bs,
> >  cow_request_begin(&cow_request, job, start, end);
> >  
> >  for (; start < end; start++) {
> > -if (hbitmap_get(job->bitmap, start)) {
> > +if (test_bit(start, job->done_bitmap)) {
> >  trace_backup_do_cow_skip(job, start);
> >  continue; /* already copied */
> >  }
> > @@ -164,7 +165,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
> > *bs,
> >  goto out;
> >  }
> >  
> > -hbitmap_set(job->bitmap, start, 1);
> > +bitmap_set(job->done_bitmap, start, 1);
> 
> You can use set_bit() here.

Why? I think bitmap_set is a better match with bitmap_new below.

Fam

> 
> Thanks
> Wen Congyang
> 
> >  
> >  /* Publish progress, guest I/O counts as progress too.  Note that 
> > the
> >   * offset field is an opaque progress value, it is not a disk 
> > offset.
> > @@ -394,7 +395,7 @@ static void coroutine_fn backup_run(void *opaque)
> >  start = 0;
> >  end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
> >  
> > -job->bitmap = hbitmap_alloc(end, 0);
> > +job->done_bitmap = bitmap_new(end);
> >  
> >  bdrv_set_enable_write_cache(target, true);
> >  if (target->blk) {
> > @@ -475,7 +476,7 @@ static void coroutine_fn backup_run(void *opaque)
> >  /* wait until pending backup_do_cow() calls have completed */
> >  qemu_co_rwlock_wrlock(&job->flush_rwlock);
> >  qemu_co_rwlock_unlock(&job->flush_rwlock);
> > -hbitmap_free(job->bitmap);
> > +g_free(job->done_bitmap);
> >  
> >  if (target->blk) {
> >  blk_iostatus_disable(target->blk);
> > 
> 



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Wen Congyang
On 11/23/2015 05:19 PM, Fam Zheng wrote:
> On Mon, 11/23 17:01, Wen Congyang wrote:
>> On 11/20/2015 05:59 PM, Fam Zheng wrote:
>>> "s->bitmap" tracks done sectors, we only check bit states without using any
>>> iterator which HBitmap is good for. Switch to "Bitmap" which is simpler and
>>> more memory efficient.
>>>
>>> Meanwhile, rename it to done_bitmap, to reflect the intention.
>>>
>>> Signed-off-by: Fam Zheng 
>>> ---
>>>  block/backup.c | 11 ++-
>>>  1 file changed, 6 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/block/backup.c b/block/backup.c
>>> index 3b39119..d408f98 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -22,6 +22,7 @@
>>>  #include "qapi/qmp/qerror.h"
>>>  #include "qemu/ratelimit.h"
>>>  #include "sysemu/block-backend.h"
>>> +#include "qemu/bitmap.h"
>>>  
>>>  #define BACKUP_CLUSTER_BITS 16
>>>  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
>>> @@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
>>>  BlockdevOnError on_target_error;
>>>  CoRwlock flush_rwlock;
>>>  uint64_t sectors_read;
>>> -HBitmap *bitmap;
>>> +unsigned long *done_bitmap;
>>>  QLIST_HEAD(, CowRequest) inflight_reqs;
>>>  } BackupBlockJob;
>>>  
>>> @@ -113,7 +114,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
>>> *bs,
>>>  cow_request_begin(&cow_request, job, start, end);
>>>  
>>>  for (; start < end; start++) {
>>> -if (hbitmap_get(job->bitmap, start)) {
>>> +if (test_bit(start, job->done_bitmap)) {
>>>  trace_backup_do_cow_skip(job, start);
>>>  continue; /* already copied */
>>>  }
>>> @@ -164,7 +165,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
>>> *bs,
>>>  goto out;
>>>  }
>>>  
>>> -hbitmap_set(job->bitmap, start, 1);
>>> +bitmap_set(job->done_bitmap, start, 1);
>>
>> You can use set_bit() here.
> 
> Why? I think bitmap_set is a better match with bitmap_new below.

set_bit() is quicker than bitmap_set() if you only set one bit.

Thanks
Wen Congyang

> 
> Fam
> 
>>
>> Thanks
>> Wen Congyang
>>
>>>  
>>>  /* Publish progress, guest I/O counts as progress too.  Note that 
>>> the
>>>   * offset field is an opaque progress value, it is not a disk 
>>> offset.
>>> @@ -394,7 +395,7 @@ static void coroutine_fn backup_run(void *opaque)
>>>  start = 0;
>>>  end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>>>  
>>> -job->bitmap = hbitmap_alloc(end, 0);
>>> +job->done_bitmap = bitmap_new(end);
>>>  
>>>  bdrv_set_enable_write_cache(target, true);
>>>  if (target->blk) {
>>> @@ -475,7 +476,7 @@ static void coroutine_fn backup_run(void *opaque)
>>>  /* wait until pending backup_do_cow() calls have completed */
>>>  qemu_co_rwlock_wrlock(&job->flush_rwlock);
>>>  qemu_co_rwlock_unlock(&job->flush_rwlock);
>>> -hbitmap_free(job->bitmap);
>>> +g_free(job->done_bitmap);
>>>  
>>>  if (target->blk) {
>>>  blk_iostatus_disable(target->blk);
>>>
>>
> .
> 




Re: [Qemu-devel] [RFC] virtio: convert to use DMA api

2015-11-23 Thread Cornelia Huck
On Mon, 23 Nov 2015 15:41:11 +0800
Jason Wang  wrote:

> Currently, all virtio devices bypass IOMMU completely. This is because
> address_space_memory is assumed and used during DMA emulation. This
> patch converts the virtio core API to use DMA API. This idea is
> 
> - introducing a new transport specific helper to query the dma address
>   space. (only pci version is implemented).
> - query and use this address space during virtio device guest memory
>   accessing
> 
> With this virtiodevices will not bypass IOMMU anymore. Little tested with
> intel_iommu=on with virtio guest DMA series posted in
> https://lkml.org/lkml/2015/10/28/64.
> 
> TODO:
> - Feature bit for this

I'm still not convinced about that feature bit stuff. It just feels
wrong to use a mechanism that conveys negotiable device features to
configure what is basically a platform/hypervisor feature. I'd rather
see this out of the virtio layer and into the pci layer.

> - Implement this for all transports

Is it OK to just keep a fallback to today's implementation for
transports for which the iommu concept doesn't make sense and that will
always have an identity mapping?

> 
> Signed-off-by: Jason Wang 
> ---
>  hw/block/virtio-blk.c |  2 +-
>  hw/char/virtio-serial-bus.c   |  2 +-
>  hw/scsi/virtio-scsi.c |  2 +-
>  hw/virtio/virtio-pci.c|  9 +
>  hw/virtio/virtio.c| 36 +++--
>  include/hw/virtio/virtio-access.h | 42 
> +--
>  include/hw/virtio/virtio-bus.h|  1 +
>  include/hw/virtio/virtio.h|  2 +-
>  8 files changed, 67 insertions(+), 29 deletions(-)

FWIW, this doesn't seem to break for the s390-ccw-virtio machine (only
snifftested).




Re: [Qemu-devel] [RFC] virtio: convert to use DMA api

2015-11-23 Thread Michael S. Tsirkin
On Mon, Nov 23, 2015 at 10:36:45AM +0100, Cornelia Huck wrote:
> On Mon, 23 Nov 2015 15:41:11 +0800
> Jason Wang  wrote:
> 
> > Currently, all virtio devices bypass IOMMU completely. This is because
> > address_space_memory is assumed and used during DMA emulation. This
> > patch converts the virtio core API to use DMA API. This idea is
> > 
> > - introducing a new transport specific helper to query the dma address
> >   space. (only pci version is implemented).
> > - query and use this address space during virtio device guest memory
> >   accessing
> > 
> > With this virtiodevices will not bypass IOMMU anymore. Little tested with
> > intel_iommu=on with virtio guest DMA series posted in
> > https://lkml.org/lkml/2015/10/28/64.
> > 
> > TODO:
> > - Feature bit for this
> 
> I'm still not convinced about that feature bit stuff. It just feels
> wrong to use a mechanism that conveys negotiable device features to
> configure what is basically a platform/hypervisor feature. I'd rather
> see this out of the virtio layer and into the pci layer.

I agree it's not something that needs to be negotiated.

But given that we are changing device and driver behaviour in a drastic
way, it seems prudent to have a way for guest and host to discover that,
even if it's just in case.

Whether it's a feature bit or something else pci-specific,
depends on whether this makes sense for any other transport.

> > - Implement this for all transports
> 
> Is it OK to just keep a fallback to today's implementation for
> transports for which the iommu concept doesn't make sense and that will
> always have an identity mapping?

Is there a reason why iommu does not make any sense for ccw or mmio?


> > 
> > Signed-off-by: Jason Wang 
> > ---
> >  hw/block/virtio-blk.c |  2 +-
> >  hw/char/virtio-serial-bus.c   |  2 +-
> >  hw/scsi/virtio-scsi.c |  2 +-
> >  hw/virtio/virtio-pci.c|  9 +
> >  hw/virtio/virtio.c| 36 +++--
> >  include/hw/virtio/virtio-access.h | 42 
> > +--
> >  include/hw/virtio/virtio-bus.h|  1 +
> >  include/hw/virtio/virtio.h|  2 +-
> >  8 files changed, 67 insertions(+), 29 deletions(-)
> 
> FWIW, this doesn't seem to break for the s390-ccw-virtio machine (only
> snifftested).



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Fam Zheng
On Mon, 11/23 17:24, Wen Congyang wrote:
> On 11/23/2015 05:19 PM, Fam Zheng wrote:
> > On Mon, 11/23 17:01, Wen Congyang wrote:
> >> On 11/20/2015 05:59 PM, Fam Zheng wrote:
> >>> "s->bitmap" tracks done sectors, we only check bit states without using 
> >>> any
> >>> iterator which HBitmap is good for. Switch to "Bitmap" which is simpler 
> >>> and
> >>> more memory efficient.
> >>>
> >>> Meanwhile, rename it to done_bitmap, to reflect the intention.
> >>>
> >>> Signed-off-by: Fam Zheng 
> >>> ---
> >>>  block/backup.c | 11 ++-
> >>>  1 file changed, 6 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/block/backup.c b/block/backup.c
> >>> index 3b39119..d408f98 100644
> >>> --- a/block/backup.c
> >>> +++ b/block/backup.c
> >>> @@ -22,6 +22,7 @@
> >>>  #include "qapi/qmp/qerror.h"
> >>>  #include "qemu/ratelimit.h"
> >>>  #include "sysemu/block-backend.h"
> >>> +#include "qemu/bitmap.h"
> >>>  
> >>>  #define BACKUP_CLUSTER_BITS 16
> >>>  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
> >>> @@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
> >>>  BlockdevOnError on_target_error;
> >>>  CoRwlock flush_rwlock;
> >>>  uint64_t sectors_read;
> >>> -HBitmap *bitmap;
> >>> +unsigned long *done_bitmap;
> >>>  QLIST_HEAD(, CowRequest) inflight_reqs;
> >>>  } BackupBlockJob;
> >>>  
> >>> @@ -113,7 +114,7 @@ static int coroutine_fn 
> >>> backup_do_cow(BlockDriverState *bs,
> >>>  cow_request_begin(&cow_request, job, start, end);
> >>>  
> >>>  for (; start < end; start++) {
> >>> -if (hbitmap_get(job->bitmap, start)) {
> >>> +if (test_bit(start, job->done_bitmap)) {
> >>>  trace_backup_do_cow_skip(job, start);
> >>>  continue; /* already copied */
> >>>  }
> >>> @@ -164,7 +165,7 @@ static int coroutine_fn 
> >>> backup_do_cow(BlockDriverState *bs,
> >>>  goto out;
> >>>  }
> >>>  
> >>> -hbitmap_set(job->bitmap, start, 1);
> >>> +bitmap_set(job->done_bitmap, start, 1);
> >>
> >> You can use set_bit() here.
> > 
> > Why? I think bitmap_set is a better match with bitmap_new below.
> 
> set_bit() is quicker than bitmap_set() if you only set one bit.
> 

How much quicker is it? This doesn't sound convincing enough for me to lose the
readability.

Fam



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Wen Congyang
On 11/23/2015 05:55 PM, Fam Zheng wrote:
> On Mon, 11/23 17:24, Wen Congyang wrote:
>> On 11/23/2015 05:19 PM, Fam Zheng wrote:
>>> On Mon, 11/23 17:01, Wen Congyang wrote:
 On 11/20/2015 05:59 PM, Fam Zheng wrote:
> "s->bitmap" tracks done sectors, we only check bit states without using 
> any
> iterator which HBitmap is good for. Switch to "Bitmap" which is simpler 
> and
> more memory efficient.
>
> Meanwhile, rename it to done_bitmap, to reflect the intention.
>
> Signed-off-by: Fam Zheng 
> ---
>  block/backup.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/block/backup.c b/block/backup.c
> index 3b39119..d408f98 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -22,6 +22,7 @@
>  #include "qapi/qmp/qerror.h"
>  #include "qemu/ratelimit.h"
>  #include "sysemu/block-backend.h"
> +#include "qemu/bitmap.h"
>  
>  #define BACKUP_CLUSTER_BITS 16
>  #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
> @@ -47,7 +48,7 @@ typedef struct BackupBlockJob {
>  BlockdevOnError on_target_error;
>  CoRwlock flush_rwlock;
>  uint64_t sectors_read;
> -HBitmap *bitmap;
> +unsigned long *done_bitmap;
>  QLIST_HEAD(, CowRequest) inflight_reqs;
>  } BackupBlockJob;
>  
> @@ -113,7 +114,7 @@ static int coroutine_fn 
> backup_do_cow(BlockDriverState *bs,
>  cow_request_begin(&cow_request, job, start, end);
>  
>  for (; start < end; start++) {
> -if (hbitmap_get(job->bitmap, start)) {
> +if (test_bit(start, job->done_bitmap)) {
>  trace_backup_do_cow_skip(job, start);
>  continue; /* already copied */
>  }
> @@ -164,7 +165,7 @@ static int coroutine_fn 
> backup_do_cow(BlockDriverState *bs,
>  goto out;
>  }
>  
> -hbitmap_set(job->bitmap, start, 1);
> +bitmap_set(job->done_bitmap, start, 1);

 You can use set_bit() here.
>>>
>>> Why? I think bitmap_set is a better match with bitmap_new below.
>>
>> set_bit() is quicker than bitmap_set() if you only set one bit.
>>
> 
> How much quicker is it? This doesn't sound convincing enough for me to lose 
> the
> readability.

I don't test it. It is just a suggestion.

Thanks
Wen Congyang

> 
> Fam
> .
> 




Re: [Qemu-devel] [PATCH for-2.5 v2 0/4] block: replace IOV_MAX with BlockLimits.max_iov

2015-11-23 Thread Stefan Hajnoczi
On Thu, Jul 09, 2015 at 10:56:43AM +0100, Stefan Hajnoczi wrote:
> v2:
>  * Default to IOV_MAX instead of INT_MAX [Peter Lieven]
> 
> This series depends on "[PATCH] block/mirror: limit qiov to IOV_MAX elements".
> 
> IOV_MAX has been hardcoded in several places since preadv()/pwritev()/etc
> refuse to take more iovecs per call.  That's actually an implementation detail
> of raw-posix and we shouldn't spread that across the codebase.
> 
> This series adds BlockLimits.max_iov (similar to BlockLimits.max_transfer_len
> and other fields).  Current IOV_MAX users are converted to blk_get_max_iov().
> 
> By the way, the IOV_MAX vs BlockLimits.max_iov naming is slightly confusing 
> but
> follows the BlockLimits field naming convention.  Once IOV_MAX is banished no
> one will be bothered by the reverse naming anymore.
> 
> Suggested-by: Kevin Wolf 
> 
> Stefan Hajnoczi (4):
>   block: add BlockLimits.max_iov field
>   block-backend: add blk_get_max_iov()
>   block: replace IOV_MAX with BlockLimits.max_iov
>   block/mirror: replace IOV_MAX with blk_get_max_iov()
> 
>  block/block-backend.c  | 5 +
>  block/io.c | 8 +++-
>  block/mirror.c | 6 --
>  hw/block/virtio-blk.c  | 2 +-
>  include/block/block_int.h  | 3 +++
>  include/sysemu/block-backend.h | 1 +
>  6 files changed, 21 insertions(+), 4 deletions(-)
> 
> -- 
> 2.4.3
> 

Thanks, applied to my block-next tree:
https://github.com/stefanha/qemu/commits/block-next

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH for-2.5 v2 0/4] block: replace IOV_MAX with BlockLimits.max_iov

2015-11-23 Thread Stefan Hajnoczi
On Wed, Nov 18, 2015 at 03:51:29PM +0100, Kevin Wolf wrote:
> Am 09.07.2015 um 11:56 hat Stefan Hajnoczi geschrieben:
> > v2:
> >  * Default to IOV_MAX instead of INT_MAX [Peter Lieven]
> > 
> > This series depends on "[PATCH] block/mirror: limit qiov to IOV_MAX 
> > elements".
> > 
> > IOV_MAX has been hardcoded in several places since preadv()/pwritev()/etc
> > refuse to take more iovecs per call.  That's actually an implementation 
> > detail
> > of raw-posix and we shouldn't spread that across the codebase.
> > 
> > This series adds BlockLimits.max_iov (similar to 
> > BlockLimits.max_transfer_len
> > and other fields).  Current IOV_MAX users are converted to 
> > blk_get_max_iov().
> > 
> > By the way, the IOV_MAX vs BlockLimits.max_iov naming is slightly confusing 
> > but
> > follows the BlockLimits field naming convention.  Once IOV_MAX is banished 
> > no
> > one will be bothered by the reverse naming anymore.
> > 
> > Suggested-by: Kevin Wolf 
> 
> I was just looking for patches marked as for-2.5 and found this one...
> Is there a reason it wasn't applied or did we just forget about it?

This series was forgotten.  I have applied it for QEMU 2.6 since it is
not a bug fix.

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] PING: [PATCH] backends/hostmem: Ignore ENOSYS while setting MPOL_DEFAULT

2015-11-23 Thread Paolo Bonzini
On 23/11/2015 07:41, Pavel Fedin wrote:
>  Hello! No news for a long time, we are at RC stage. Could we get this in?

Yes, queued for -rc2.

Paolo

> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
> 
>> -Original Message-
>> From: qemu-devel-bounces+p.fedin=samsung@nongnu.org [mailto:qemu-devel-
>> bounces+p.fedin=samsung@nongnu.org] On Behalf Of Eduardo Habkost
>> Sent: Tuesday, October 27, 2015 8:32 PM
>> To: Pavel Fedin
>> Cc: 'Paolo Bonzini'; qemu-devel@nongnu.org
>> Subject: Re: [Qemu-devel] [PATCH] backends/hostmem: Ignore ENOSYS while 
>> setting MPOL_DEFAULT
>>
>> On Tue, Oct 27, 2015 at 03:51:31PM +0300, Pavel Fedin wrote:
>>> Currently hostmem backend fails if CONFIG_NUMA is enabled for the qemu
>>> (default), but NUMA is not supported by the kernel. This makes it
>>> impossible to use ivshmem in such configurations.
>>>
>>> This patch fixes the problem by ignoring ENOSYS error if policy is set to
>>> MPOL_DEFAULT. This way the code behaves in the same way as if CONFIG_NUMA
>>> was not defined. qemu will still fail if the user specifies some other
>>> policy, so that the user knows it.
>>>
>>> Signed-off-by: Pavel Fedin 
>>
>> Reviewed-by: Eduardo Habkost 
>>
>> Thanks. Applied to numa tree, with the following indentation fix:
>>
>> diff --git a/backends/hostmem.c b/backends/hostmem.c
>> index 94a4ac0..1b4eb45 100644
>> --- a/backends/hostmem.c
>> +++ b/backends/hostmem.c
>> @@ -315,7 +315,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
>> Error **errp)
>>maxnode ? backend->host_nodes : NULL, maxnode + 1, 
>> flags)) {
>>  if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
>>  error_setg_errno(errp, errno,
>> - "cannot bind memory to host NUMA nodes");
>> + "cannot bind memory to host NUMA nodes");
>>  return;
>>  }
>>  }
>>
>>> ---
>>>  backends/hostmem.c | 6 --
>>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/backends/hostmem.c b/backends/hostmem.c
>>> index 41ba2af..94a4ac0 100644
>>> --- a/backends/hostmem.c
>>> +++ b/backends/hostmem.c
>>> @@ -313,9 +313,11 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
>>> Error **errp)
>>>  assert(maxnode <= MAX_NODES);
>>>  if (mbind(ptr, sz, backend->policy,
>>>maxnode ? backend->host_nodes : NULL, maxnode + 1, 
>>> flags)) {
>>> -error_setg_errno(errp, errno,
>>> +if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
>>> +error_setg_errno(errp, errno,
>>>   "cannot bind memory to host NUMA nodes");
>>> -return;
>>> +return;
>>> +}
>>>  }
>>>  #endif
>>>  /* Preallocate memory after the NUMA policy has been instantiated.
>>> --
>>> 1.9.5.msysgit.0
>>>
>>>
>>
>> --
>> Eduardo
> 
> 
> 



Re: [Qemu-devel] [PATCH] block/qapi: Plug memory leak on query-block error path

2015-11-23 Thread Kevin Wolf
Am 20.11.2015 um 13:53 hat Markus Armbruster geschrieben:
> Spotted by Coverity.
> 
> Signed-off-by: Markus Armbruster 

Reviewed-by: Kevin Wolf 



Re: [Qemu-devel] PING: [PATCH] backends/hostmem: Ignore ENOSYS while setting MPOL_DEFAULT

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 11:09, Paolo Bonzini wrote:
> On 23/11/2015 07:41, Pavel Fedin wrote:
>>  Hello! No news for a long time, we are at RC stage. Could we get this in?
> 
> Yes, queued for -rc2.

... doh, Eduardo applied it to the NUMA tree already.  I missed that
backends/hostmem* is under NUMA and not memory.

Paolo

> Paolo
> 
>> Kind regards,
>> Pavel Fedin
>> Expert Engineer
>> Samsung Electronics Research center Russia
>>
>>> -Original Message-
>>> From: qemu-devel-bounces+p.fedin=samsung@nongnu.org [mailto:qemu-devel-
>>> bounces+p.fedin=samsung@nongnu.org] On Behalf Of Eduardo Habkost
>>> Sent: Tuesday, October 27, 2015 8:32 PM
>>> To: Pavel Fedin
>>> Cc: 'Paolo Bonzini'; qemu-devel@nongnu.org
>>> Subject: Re: [Qemu-devel] [PATCH] backends/hostmem: Ignore ENOSYS while 
>>> setting MPOL_DEFAULT
>>>
>>> On Tue, Oct 27, 2015 at 03:51:31PM +0300, Pavel Fedin wrote:
 Currently hostmem backend fails if CONFIG_NUMA is enabled for the qemu
 (default), but NUMA is not supported by the kernel. This makes it
 impossible to use ivshmem in such configurations.

 This patch fixes the problem by ignoring ENOSYS error if policy is set to
 MPOL_DEFAULT. This way the code behaves in the same way as if CONFIG_NUMA
 was not defined. qemu will still fail if the user specifies some other
 policy, so that the user knows it.

 Signed-off-by: Pavel Fedin 
>>>
>>> Reviewed-by: Eduardo Habkost 
>>>
>>> Thanks. Applied to numa tree, with the following indentation fix:
>>>
>>> diff --git a/backends/hostmem.c b/backends/hostmem.c
>>> index 94a4ac0..1b4eb45 100644
>>> --- a/backends/hostmem.c
>>> +++ b/backends/hostmem.c
>>> @@ -315,7 +315,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
>>> Error **errp)
>>>maxnode ? backend->host_nodes : NULL, maxnode + 1, 
>>> flags)) {
>>>  if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
>>>  error_setg_errno(errp, errno,
>>> - "cannot bind memory to host NUMA nodes");
>>> + "cannot bind memory to host NUMA nodes");
>>>  return;
>>>  }
>>>  }
>>>
 ---
  backends/hostmem.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

 diff --git a/backends/hostmem.c b/backends/hostmem.c
 index 41ba2af..94a4ac0 100644
 --- a/backends/hostmem.c
 +++ b/backends/hostmem.c
 @@ -313,9 +313,11 @@ host_memory_backend_memory_complete(UserCreatable 
 *uc, Error **errp)
  assert(maxnode <= MAX_NODES);
  if (mbind(ptr, sz, backend->policy,
maxnode ? backend->host_nodes : NULL, maxnode + 1, 
 flags)) {
 -error_setg_errno(errp, errno,
 +if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
 +error_setg_errno(errp, errno,
   "cannot bind memory to host NUMA nodes");
 -return;
 +return;
 +}
  }
  #endif
  /* Preallocate memory after the NUMA policy has been instantiated.
 --
 1.9.5.msysgit.0


>>>
>>> --
>>> Eduardo
>>
>>
>>
> 
> 



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Markus Armbruster
Marc-André Lureau  writes:

> Hi
>
> - Original Message -
>> >> Hash ivshmem been used in anger?  If yes, how?
>> 
>> Still the question to answer.
>
> I don't expect users to read this ML everyday (anybody
> actually). Personally, I have no clue how widespread ivshmem usage is.
>
>> Besides the usual PCI properties, we have:
>> 
>> ivshmem.chardev=str (ID of a chardev to use as a backend)
>> ivshmem.size=str
>> ivshmem.vectors=uint32
>> ivshmem.ioeventfd=bool (on/off)
>> ivshmem.msi=bool (on/off)
>> ivshmem.shm=str
>> ivshmem.role=str
>> ivshmem.memdev=link
>> ivshmem.use64=uint32
>> 
>> Exactly one of chardev, shm, memdev must be given.  qemu-doc.info claims
>> you can omit all three, but that's a bug.
>
> interesting, I guess the doc must be updated

One-liner.

>> vectors, ioeventfd and msi make sense only with chardev.  The code
>> appears to silently ignore them unless chardev is given.  Except it
>> still rejects ioeventfd=on,msi=off.  I wouldn't bet on nonsensical
>> combinations to actually work.
>
> I haven't tried all combinations, to me it's not a bug if an argument
> is silently ignored, but we are missing a warning.

In general, configuration that doesn't make sense should be flat-out
rejected.

A warning is appropriate when we feel rejecting would break backward
compatibility.

Non-sensical option combinations can signify badly designed interfaces.
I believe this is the case here.  More on that below.

>> qemu-doc documents role only with chardev.  The code doesn't care.
>
> yeah, role is only really useful with a server. Another missing warning.

I think it makes sense only when we can migrate the shared memory
contents out-of-band.  Vaguely similar to block devices: either you
migrate them along (block migration), or you have to ensure they have
identical contents on the target in some other way.

Is the latter possible only with a server?

>> size makes sense only with shm and chardev.  If you specify it with
>> memdev, it's ignored with a warning.
>
> Ah! it's probably fine then?

For a definition of "fine"

>> With shm, we first try to create the shared memory object with the
>> specified size, and if that fails, we try to open it.  The specified
>> size must not be larger than the shared memory object then.
>> 
>> With chardev, we receive the shared memory object from the server.
>> Until we get it, BAR isn't mapped.  If the specified size is larger, the
>> BAR remains unmapped.
>
> yep
>
>> use64 sets PCI_BASE_ADDRESS_SPACE_MEMORY.  Not documented in qemu-doc.
>
> I don't think all properties are documented, are they? Gerd added this
> in c08ba66f with pretty good commit message that we could adapt to add
> in documentation.

Yes.

>> This is a byzantine mess even for QEMU standards.
>> 
>> Questions:
>> 
>> * Is it okay to have an unmapped BAR?
>
> I can't say much, though I remember I did some tests and it was ok.

Did you try chardev=...,size=S, where S is larger than what the server
provides?

A guest that fails there probably works for sufficiently small S only
when it loses the race with the server.

But my question is actually whether it's okay for a PCI device to
advertize a BAR, and then not map anything there.

Should realize wait for the server providing the shared memory?

>> * We actually have two distinct device kinds:
>> 
>>   - The basic device: shm with parameter size, or memdev
>> 
>>   - The doorbell-capable device: chardev with parameters size, vectors,
>> ioeventfd=on, msi
>> 
>>   Common parameters: use64, role, although the latter is only documented
>>   for the doorbell-capable device.
>> 
>>   Why is this a single device model?
>
> No idea, but I agree it would make sense to have two different devices.
>
>>   It's not even obvious to me how the guest is supposed to figure out
>>   which kind it has.
>
> I don't think it can easily: I can imagine sending a doorbell to
> yourself, but that wouldn't be really reliable either.

Ugh!

>> * shm appears to be the same as memdev, just less flexible.  Why does it
>>   exist?
>
> It was there before.

Not only is memdev more flexible, it also provides the clean split
between frontend and backend we generally want.

>> * Are we *sure* this is ready to become ABI?  I doubt it's ABI yet,
>>   because before Marc-André's massive rework, it was even worse (*much*
>>   worse), to a degree that makes me doubt anybody could use it
>>   seriously.
>
> It's supposed to be the same ABI as before, with the memdev addition.

Well, it's *crap*.  We shouldn't extend and known crap so it can get
used more widely, we should deprecate it in favour of something less
crappy.

Here's my attempt:

1. Split ivshmem into ivshmem-plain and ivshmem-doorbell.

   Each one gets its own PCI device ID, so that guests can trivially see
   whether the device got a doorbell.

   Both have property use64.

   ivshmem-plain additionally has property memdev.

   ivshmem-doorbell additionally has chardev,

Re: [Qemu-devel] [PATCH for 2.5 1/1] parallels: dirty BAT properly for continuous allocations

2015-11-23 Thread Stefan Hajnoczi
On Tue, Nov 17, 2015 at 08:02:58PM +0300, Denis V. Lunev wrote:
> From: Vladimir Sementsov-Ogievskiy 
> 
> This patch marks part of the BAT dirty properly. There is a possibility that
> multy-block allocation could have one block allocated on one BAT page and
> next block on the next page. The code without the patch could not save
> updated position to the file.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Signed-off-by: Denis V. Lunev 
> CC: Stefan Hajnoczi 
> ---
>  block/parallels.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Thanks, applied to my block tree:
https://github.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH] linux-user,sh4: fix signal retcode address

2015-11-23 Thread Laurent Vivier
To return from a signal, setup_frame() puts an instruction to
be executed in the stack. This sequence calls the syscall sigreturn().

The address of the instruction must be set in the PR register
to be executed.

This patch fixes this: the current code sets the register to the address
of the instruction in the host address space (which can be 64bit whereas
PR is only 32bit), but the virtual CPU can't access this address space,
so we put in PR the address of the instruction in the guest address space.

This patch also removes an useless variable (ret) in the modified functions.

Signed-off-by: Laurent Vivier 
---
 linux-user/signal.c | 16 ++--
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 55e5405..5e8f6d8 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3215,7 +3215,6 @@ static void setup_frame(int sig, struct target_sigaction 
*ka,
 struct target_sigframe *frame;
 abi_ulong frame_addr;
 int i;
-int err = 0;
 
 frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
 if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
@@ -3233,15 +3232,14 @@ static void setup_frame(int sig, struct 
target_sigaction *ka,
 regs->pr = (unsigned long) ka->sa_restorer;
 } else {
 /* Generate return code (system call to sigreturn) */
+abi_ulong retcode_addr = frame_addr +
+ offsetof(struct target_sigframe, retcode);
 __put_user(MOVW(2), &frame->retcode[0]);
 __put_user(TRAP_NOARG, &frame->retcode[1]);
 __put_user((TARGET_NR_sigreturn), &frame->retcode[2]);
-regs->pr = (unsigned long) frame->retcode;
+regs->pr = (unsigned long) retcode_addr;
 }
 
-if (err)
-goto give_sigsegv;
-
 /* Set up registers for signal handler */
 regs->gregs[15] = frame_addr;
 regs->gregs[4] = sig; /* Arg for signal handler */
@@ -3264,7 +3262,6 @@ static void setup_rt_frame(int sig, struct 
target_sigaction *ka,
 struct target_rt_sigframe *frame;
 abi_ulong frame_addr;
 int i;
-int err = 0;
 
 frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame));
 if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
@@ -3293,15 +3290,14 @@ static void setup_rt_frame(int sig, struct 
target_sigaction *ka,
 regs->pr = (unsigned long) ka->sa_restorer;
 } else {
 /* Generate return code (system call to sigreturn) */
+abi_ulong retcode_addr = frame_addr +
+ offsetof(struct target_rt_sigframe, retcode);
 __put_user(MOVW(2), &frame->retcode[0]);
 __put_user(TRAP_NOARG, &frame->retcode[1]);
 __put_user((TARGET_NR_rt_sigreturn), &frame->retcode[2]);
-regs->pr = (unsigned long) frame->retcode;
+regs->pr = (unsigned long) retcode_addr;
 }
 
-if (err)
-goto give_sigsegv;
-
 /* Set up registers for signal handler */
 regs->gregs[15] = frame_addr;
 regs->gregs[4] = sig; /* Arg for signal handler */
-- 
2.4.3




Re: [Qemu-devel] [PATCH] linux-user, sh4: fix signal retcode address

2015-11-23 Thread John Paul Adrian Glaubitz
On 11/23/2015 11:38 AM, Laurent Vivier wrote:
> Signed-off-by: Laurent Vivier 

Tested-by: John Paul Adrian Glaubitz 

This patch fixes crashes of qemu-sh4* on amd64 for me [1].
I also haven't seen any regressions ever since.

Cheers,
Adrian

> [1] https://bugs.launchpad.net/ubuntu/+source/qemu-linaro/+bug/1254824

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaub...@debian.org
`. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913



[Qemu-devel] [PATCH for-2.5] vhost-user: clarify start and enable

2015-11-23 Thread Michael S. Tsirkin
It seems that we currently have some duplication between
started and enabled states.

The actual reason is that enable is not documented correctly:
what it does is connecting ring to the backend.

This is important for MQ, because a Linux guest expects TX
packets to be completed even if it disables some queues
temporarily.

Cc: Yuanhan Liu 
Cc: Victor Kaplansky 
Signed-off-by: Michael S. Tsirkin 
---
 docs/specs/vhost-user.txt | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index 7b9cd6d..0312d40 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -148,13 +148,23 @@ a feature bit was dedicated for this purpose:
 
 Starting and stopping rings
 --
-Client must only process each ring when it is both started and enabled.
+Client must only process each ring when it is started.
+
+Client must only pass data between the ring and the
+backend, when the ring is enabled.
+
+If ring is started but disabled, client must process the
+ring without talking to the backend.
+
+For example, for a networking device, in the disabled state
+client must not supply any new RX packets, but must process
+and discard any TX packets.
 
 If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated, the ring is 
initialized
 in an enabled state.
 
 If VHOST_USER_F_PROTOCOL_FEATURES has been negotiated, the ring is initialized
-in a disabled state. Client must not process it until ring is enabled by
+in a disabled state. Client must not pass data to/from the backend until ring 
is enabled by
 VHOST_USER_SET_VRING_ENABLE with parameter 1, or after it has been disabled by
 VHOST_USER_SET_VRING_ENABLE with parameter 0.
 
@@ -166,7 +176,7 @@ descriptor is readable) on the descriptor specified by
 VHOST_USER_SET_VRING_KICK, and stop ring upon receiving
 VHOST_USER_GET_VRING_BASE.
 
-While processing the rings (when they are started and enabled), client must
+While processing the rings (whether they are enabled or not), client must
 support changing some configuration aspects on the fly.
 
 Multiple queue support
@@ -309,11 +319,11 @@ Message types
   Id: 4
   Master payload: N/A
 
-  This is no longer used. Used to be sent to request stopping
+  This is no longer used. Used to be sent to request disabling
   all rings, but some clients interpreted it to also discard
   connection state (this interpretation would lead to bugs).
   It is recommended that clients either ignore this message,
-  or use it to stop all rings.
+  or use it to disable all rings.
 
  * VHOST_USER_SET_MEM_TABLE
 
-- 
MST



Re: [Qemu-devel] [PATCH v2] numa: Clean up query-memdev error handling

2015-11-23 Thread Markus Armbruster
"Michael S. Tsirkin"  writes:

> On Mon, Nov 23, 2015 at 09:35:31AM +0100, Markus Armbruster wrote:
>> qmp_query_memdev() has two error paths:
>> 
>> * When object_get_objects_root() returns null.  It never does, so
>>   simply drop the useless error handling.
>> 
>> * When query_memdev() fails.  It leaks err then.  But any failure
>>   there is actually a programming error.  Switch it to &error_abort,
>>   and drop the useless error handling.
>> 
>> Messed up in commit 76b5d85 "qmp: add query-memdev".
>> 
>> Signed-off-by: Markus Armbruster 
>
>
> Post 2.5 right?

I don't mind.  I meant v1 for 2.5, but we've since convinced ourselves
that errors can't happen, and the memory leak is only latent.



Re: [Qemu-devel] [PATCH v5 00/10] sPAPR CPU hotplug

2015-11-23 Thread Peter Krempa
On Fri, Nov 20, 2015 at 18:24:29 +0530, Bharata B Rao wrote:
> This patchset adds CPU hotplug support for sPAPR PowerPC guests using
> device_add and device_del commands
> 
> (qemu) device_add POWER8-powerpc64-cpu,id=cpu0

Is there a reason why this uses 'device_add' rather than the 'cpu_add'
command? Libvirt uses two separate approaches already. Due to legacy
reasons we support the HMP 'cpu_set' command, and lately we added
support for QMP 'cpu-add'. Using device_add here will introduce a
different approach and will require yet another compatibility layer in
libvirt to support this.

Peter



signature.asc
Description: Digital signature


Re: [Qemu-devel] [PATCH for-2.5] target-arm: Don't mask out bits [47:40] in LPAE descriptors for v8

2015-11-23 Thread Edgar E. Iglesias
On Fri, Nov 20, 2015 at 02:32:51PM +, Peter Maydell wrote:
> In an LPAE format descriptor in ARMv8 the address field extends
> up to bit 47, not just bit 39. Correct the masking so we don't
> give incorrect results if the output address size is greater
> than 40 bits, as it can be for AArch64.
> 
> (Note that we don't yet support the new-in-v8 Address Size fault which
> should be generated if any translation table entry or TTBR contains
> an address with non-zero bits above the most significant bit of the
> maximum output address size.)
> 
> Signed-off-by: Peter Maydell 


Reviewed-by: Edgar E. Iglesias 


> ---
> This is worth fixing for 2.5 I think. As the commit message notes,
> we don't support the Addres Size faults we ought to take in some
> cases, but that seems more 2.6-ish.
> ---
>  target-arm/helper.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 4ecae61..afc4163 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -6642,6 +6642,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  int ap, ns, xn, pxn;
>  uint32_t el = regime_el(env, mmu_idx);
>  bool ttbr1_valid = true;
> +uint64_t descaddrmask;
>  
>  /* TODO:
>   * This code does not handle the different format TCR for VTCR_EL2.
> @@ -6831,6 +6832,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  descaddr = extract64(ttbr, 0, 48);
>  descaddr &= ~((1ULL << (inputsize - (stride * (4 - level - 1);
>  
> +/* The address field in the descriptor goes up to bit 39 for ARMv7
> + * but up to bit 47 for ARMv8.
> + */
> +if (arm_feature(env, ARM_FEATURE_V8)) {
> +descaddrmask = 0xf000ULL;
> +} else {
> +descaddrmask = 0xfff000ULL;
> +}
> +
>  /* Secure accesses start with the page table in secure memory and
>   * can be downgraded to non-secure at any step. Non-secure accesses
>   * remain non-secure. We implement this by just ORing in the NSTable/NS
> @@ -6854,7 +6864,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, 
> target_ulong address,
>  /* Invalid, or the Reserved level 3 encoding */
>  goto do_fault;
>  }
> -descaddr = descriptor & 0xfff000ULL;
> +descaddr = descriptor & descaddrmask;
>  
>  if ((descriptor & 2) && (level < 3)) {
>  /* Table entry. The top five bits are attributes which  may
> -- 
> 1.9.1
> 



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Marc-André Lureau
Hi

- Original Message -
>
> >> qemu-doc documents role only with chardev.  The code doesn't care.
> >
> > yeah, role is only really useful with a server. Another missing warning.
> 
> I think it makes sense only when we can migrate the shared memory
> contents out-of-band.  Vaguely similar to block devices: either you
> migrate them along (block migration), or you have to ensure they have
> identical contents on the target in some other way.
> 
> Is the latter possible only with a server?

The way ivshmem role works is:
- master: will handle migration
- peer: can't migrate

It's not out-of-band, qemu handles the shared memory migration, even if
the memory is owned by the server. In practice, it means you can only
migrate one VM, or you migrate them all but you will migrate the shared
memory N times and will have to synchronize in your guest app. I suppose ivshmem
"role" was designed to only migrate the master. Ability to migrate a pool of
peer would be a significant new feature. I am not aware of such request.

> >> This is a byzantine mess even for QEMU standards.
> >> 
> >> Questions:
> >> 
> >> * Is it okay to have an unmapped BAR?
> >
> > I can't say much, though I remember I did some tests and it was ok.
> 
> Did you try chardev=...,size=S, where S is larger than what the server
> provides?

It will fall in check_shm_size().

> A guest that fails there probably works for sufficiently small S only
> when it loses the race with the server.
> 
> But my question is actually whether it's okay for a PCI device to
> advertize a BAR, and then not map anything there.
> 
> Should realize wait for the server providing the shared memory?
> 
> >> * We actually have two distinct device kinds:
> >> 
> >>   - The basic device: shm with parameter size, or memdev
> >> 
> >>   - The doorbell-capable device: chardev with parameters size, vectors,
> >> ioeventfd=on, msi
> >> 
> >>   Common parameters: use64, role, although the latter is only documented
> >>   for the doorbell-capable device.
> >> 

> 
> >> * Are we *sure* this is ready to become ABI?  I doubt it's ABI yet,
> >>   because before Marc-André's massive rework, it was even worse (*much*
> >>   worse), to a degree that makes me doubt anybody could use it
> >>   seriously.
> >
> > It's supposed to be the same ABI as before, with the memdev addition.
> 
> Well, it's *crap*.  We shouldn't extend and known crap so it can get
> used more widely, we should deprecate it in favour of something less
> crappy.

I think you overstate this, if it would be so bad I don't think anyone could use
it and it wouldn't probably be in qemu. We are mainly talking misconfiguration
and missing features (that people may not actually need). I think
before we split things, we can address your comments with more
options checking and documentation.

> Here's my attempt:
> 
> 1. Split ivshmem into ivshmem-plain and ivshmem-doorbell.
> 
>Each one gets its own PCI device ID, so that guests can trivially see
>whether the device got a doorbell.
> 
>Both have property use64.
> 
>ivshmem-plain additionally has property memdev.
> 
>ivshmem-doorbell additionally has chardev, size, vectors, ioeventfd,
>msi.
> 
>Undecided: property role (see above).
> 
>The only non-sensical property combination is ioeventfd=on,msi=off,
>which we cleanly reject.
> 
>Undecided: behavior before the server provides the shared memory, and
>what to do when it's less than size (see above).  This is the *only*
>part of my proposal that may require code changes beyond
>configuration.  If we can't do this before the release, punt
>ivshmem-doorbell to the next cycle.

> 2. Drop ivshmem property memdev.
> 
>Use ivshmem-plain instead.
> 
> 3. Deprecate ivshmem.

It sounds like a reasonable thing to do, but I don't think the benefit is so 
important.

cheers



Re: [Qemu-devel] [PATCH WIP 01/30] crypto: add QCryptoSecret object class for password/key handling

2015-11-23 Thread Daniel P. Berrange
On Fri, Nov 20, 2015 at 03:09:25PM -0700, Eric Blake wrote:
> On 11/20/2015 11:04 AM, Daniel P. Berrange wrote:
> > +
> > +static const char *base64_valid_chars =
> > +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
> > +
> > +static int
> > +qcrypto_secret_validate_base64(const uint8_t *input,
> > +   size_t inputlen,
> > +   Error **errp)
> 
> Don't we already have base64 utility methods available?

We normally use glib,  g_base64_encode/decode. Unfortunately the
decode method doesn't provide any usefull error reporting facility.
It just silently skips any characters that are outside the valid
set.  So the only way I could get any kind of sensible error report
was to do this validation myself against the set of permitted base64
characters.


> > +++ b/qapi/crypto.json
> > @@ -19,3 +19,17 @@
> >  { 'enum': 'QCryptoTLSCredsEndpoint',
> >'prefix': 'QCRYPTO_TLS_CREDS_ENDPOINT',
> >'data': ['client', 'server']}
> > +
> > +
> > +##
> > +# QCryptoSecretFormat:
> > +#
> > +# The data format that the secret is provided in
> > +#
> > +# @raw: raw bytes. When encoded in JSON only valid UTF-8 sequences can be 
> > used
> > +# @base64: arbitrary base64 encoded binary data
> > +# Since: 2.5
> 
> You've missed 2.5.  Probably need to tweak the whole series to call out 2.6.

Yep.

> > +##
> > +{ 'enum': 'QCryptoSecretFormat',
> > +  'prefix': 'QCRYPTO_SECRET_FORMAT',
> > +  'data': ['raw', 'base64']}
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 0eea4ee..dd3f7f8 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -3670,6 +3670,7 @@ queue @var{all|rx|tx} is an option that can be 
> > applied to any netfilter.
> >  @option{tx}: the filter is attached to the transmit queue of the netdev,
> >   where it will receive packets sent by the netdev.
> >  
> > +
> >  @item -object 
> > filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
> 
> Why the added blank line here?

Rebase error I presume

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



[Qemu-devel] [PATCH 1/2] tests/Makefile: Add more dependencies for test-timed-average

2015-11-23 Thread Kevin Wolf
'make check' failed to compile the test case for mingw because of
undefined references. Pull in a few more dependencies so that it builds.

Signed-off-by: Kevin Wolf 
---
 tests/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index b937984..47fe5d5 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -416,7 +416,8 @@ tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
$(test-qom-obj-y)
 tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
libqemuutil.a stubs/clock-warp.o stubs/cpu-get-icount.o \
-   stubs/notify-event.o stubs/replay.o
+   stubs/notify-event.o stubs/replay.o stubs/mon-is-qmp.o 
stubs/fd-register.o \
+   stubs/mon-printf.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json 
$(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
-- 
1.8.3.1




[Qemu-devel] [PATCH for-2.5 0/2] Fix "make check" with mingw and Wine

2015-11-23 Thread Kevin Wolf
I tried to run "make check" on my mingw build and got two failures. Both of
them are bugs in the test suite rather than qemu proper. They are easy enough
to fix, so here are the fixes.

Kevin Wolf (2):
  tests/Makefile: Add more dependencies for test-timed-average
  test-aio: Fix event notifier cleanup

 tests/Makefile   | 3 ++-
 tests/test-aio.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH 2/2] test-aio: Fix event notifier cleanup

2015-11-23 Thread Kevin Wolf
One test case closed an event notifier (event_notifier_cleanup())
without first disabling it (set_event_notifier(..., NULL)). This
resulted in a leftover handle 0 that was added to each subsequent
WaitForMultipleObjects() call, causing the function to fail (invalid
handle).

Signed-off-by: Kevin Wolf 
---
 tests/test-aio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test-aio.c b/tests/test-aio.c
index 1623803..e188d8c 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -393,6 +393,7 @@ static void test_aio_external_client(void)
 aio_enable_external(ctx);
 }
 assert(aio_poll(ctx, false));
+set_event_notifier(ctx, &data.e, NULL);
 event_notifier_cleanup(&data.e);
 }
 }
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH WIP 04/30] qcow2: add a 'keyid' parameter to qcow2 options

2015-11-23 Thread Daniel P. Berrange
On Fri, Nov 20, 2015 at 03:15:27PM -0700, Eric Blake wrote:
> On 11/20/2015 11:04 AM, Daniel P. Berrange wrote:
> > Add a 'keyid' parameter that refers to the ID of a
> > QCryptoSecret instance that provides the encryption key.
> > 
> > $QEMU \
> > -object secret,id=sec0,filename=/home/berrange/encrypted.pw \
> > -drive file=/home/berrange/encrypted.qcow2,keyid=sec0
> > 
> > Signed-off-by: Daniel P. Berrange 
> > ---
> >  block/qcow2.c| 80 
> > +---
> >  block/qcow2.h|  1 +
> >  qapi/block-core.json |  8 --
> >  3 files changed, 64 insertions(+), 25 deletions(-)
> 
> > +++ b/qapi/block-core.json
> > @@ -1698,7 +1698,7 @@
> >  # Driver specific block device options for qcow.
> >  #
> >  # @keyid: #optional ID of the "secret" object providing the
> > -# AES decryption key.
> > +# AES decryption key (since 2.5)
> >  #
> >  # Since: 2.5
> 
> I already pointed this out on the previous post, but this hunk is wrong
> (since the entire BlockdevOptionsQcow struct is new); it instead belongs...
> 
> >  ##
> > @@ -1742,6 +1742,9 @@
> >  # caches. The interval is in seconds. The default 
> > value
> >  # is 0 and it disables this feature (since 2.5)
> >  #
> > +# @keyid: #optional ID of the "secret" object providing the
> > +# AES decryption key.
> 
> ...here as part of BlockdevOptionsQcow2.  Also, I wonder if inheriting
> from BlockdevOptionsQcow is any easier here than just declaring keyid
> directly.

When I fully integrate LUKS support in qcow2, there will be several
more parameters added to this struct, which I won't be adding to
qcow, since I don't fancy doing any work on qcow code to improve
its encryption, since its essentially obsolte. So on this basis,
I don't think inheriting BlockdevOptionsQcow will have tangible
benefit.


Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] PING: [PATCH] backends/hostmem: Ignore ENOSYS while setting MPOL_DEFAULT

2015-11-23 Thread Eduardo Habkost
On Mon, Nov 23, 2015 at 11:11:28AM +0100, Paolo Bonzini wrote:
> 
> 
> On 23/11/2015 11:09, Paolo Bonzini wrote:
> > On 23/11/2015 07:41, Pavel Fedin wrote:
> >>  Hello! No news for a long time, we are at RC stage. Could we get this in?
> > 
> > Yes, queued for -rc2.
> 
> ... doh, Eduardo applied it to the NUMA tree already.  I missed that
> backends/hostmem* is under NUMA and not memory.

I had applied it to the numa tree but forgot to send a pull
request, sorry. I will submit a pull request in a moment.

-- 
Eduardo



[Qemu-devel] [PULL 0/1] NUMA fix for -rc2

2015-11-23 Thread Eduardo Habkost
(I forgot to submit a pull request for this earlier, sorry.)

The following changes since commit 541abd10a01da56c5f16582cd32d67114ec22a5c:

  Update version for v2.5.0-rc1 release (2015-11-20 17:43:46 +)

are available in the git repository at:

  git://github.com/ehabkost/qemu.git tags/numa-pull-request

for you to fetch changes up to a3567ba1e6171ef7cfad55ae549c0cd8bffb1195:

  hostmem: Ignore ENOSYS while setting MPOL_DEFAULT (2015-11-23 10:43:38 -0200)


NUMA fix for -rc2



Pavel Fedin (1):
  hostmem: Ignore ENOSYS while setting MPOL_DEFAULT

 backends/hostmem.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

-- 
2.1.0




[Qemu-devel] [PULL for-2.5] tcg: Fix highwater check

2015-11-23 Thread Richard Henderson
From: John Clarke 

A simple typo in the variable to use when comparing vs the highwater mark.
Reports are that qemu can in fact segfault occasionally due to this mistake.

Signed-off-by: John Clarke 
Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 682af8a..b20ed19 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2443,7 +2443,7 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit 
*gen_code_buf)
one operation beginning below the high water mark cannot overrun
the buffer completely.  Thus we can test for overflow after
generating code without having to check during generation.  */
-if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
+if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
 return -1;
 }
 }
-- 
2.4.3




[Qemu-devel] [PULL for-2.5] last minute tcg fix

2015-11-23 Thread Richard Henderson
Sent to me privately, for some reason, but absolutely correct
that it can occasionally cause problems.


r~


The following changes since commit 541abd10a01da56c5f16582cd32d67114ec22a5c:

  Update version for v2.5.0-rc1 release (2015-11-20 17:43:46 +)

are available in the git repository at:

  git://github.com/rth7680/qemu.git tags/pull-tcg-20151123

for you to fetch changes up to 644da9b39e477caa80bab69d2847dfcb468f0d33:

  tcg: Fix highwater check (2015-11-23 13:16:05 +0100)


Last minute fix.


John Clarke (1):
  tcg: Fix highwater check

 tcg/tcg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



Re: [Qemu-devel] [PATCH 0/2] Don't require hw_version to be set on every machine

2015-11-23 Thread Eduardo Habkost

Ping? Any thoughts on this approach?

-rc2 is too late for this kind of change, maybe?


On Thu, Nov 12, 2015 at 03:29:53PM -0200, Eduardo Habkost wrote:
> This changes the qemu_hw_version() default to "2.5+" so we don't
> make every machine class broken by default unless they set
> hw_version.
> 
> For reference, these are the current users of qemu_hw_version():
> 
> hw/arm/nseries.c:pstrcat((void *) w, 12, qemu_hw_version()); /* char 
> version[12] */
> hw/ide/core.c:pstrcpy(s->version, sizeof(s->version), 
> qemu_hw_version());
> hw/scsi/megasas.c:snprintf(info.package_version, 0x60, "%s-QEMU", 
> qemu_hw_version());
> hw/scsi/scsi-bus.c:pstrcpy((char *) &r->buf[32], 4, 
> qemu_hw_version());
> hw/scsi/scsi-disk.c:s->version = g_strdup(qemu_hw_version());
> target-i386/cpu.c:qemu_hw_version());
> 
> Cc: Andrzej Zaborowski 
> Cc: John Snow 
> Cc: Paolo Bonzini 
> Cc: Hannes Reinecke 
> Cc: Richard Henderson 
> 
> Eduardo Habkost (2):
>   osdep: Change default value of qemu_hw_version() to "2.5+"
>   pc: Don't set hw_version on pc-*-2.5
> 
>  hw/i386/pc_piix.c| 1 -
>  hw/i386/pc_q35.c | 1 -
>  include/hw/boards.h  | 5 +
>  include/qemu/osdep.h | 4 
>  util/osdep.c | 9 -
>  5 files changed, 17 insertions(+), 3 deletions(-)
> 
> -- 
> 2.1.0
> 
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH 0/2] Don't require hw_version to be set on every machine

2015-11-23 Thread Michael S. Tsirkin
On Mon, Nov 23, 2015 at 10:53:39AM -0200, Eduardo Habkost wrote:
> 
> Ping? Any thoughts on this approach?
> 
> -rc2 is too late for this kind of change, maybe?
> 

I queued this up for 2.5, will merge later this week
unless anyone objects.

> On Thu, Nov 12, 2015 at 03:29:53PM -0200, Eduardo Habkost wrote:
> > This changes the qemu_hw_version() default to "2.5+" so we don't
> > make every machine class broken by default unless they set
> > hw_version.
> > 
> > For reference, these are the current users of qemu_hw_version():
> > 
> > hw/arm/nseries.c:pstrcat((void *) w, 12, qemu_hw_version()); /* char 
> > version[12] */
> > hw/ide/core.c:pstrcpy(s->version, sizeof(s->version), 
> > qemu_hw_version());
> > hw/scsi/megasas.c:snprintf(info.package_version, 0x60, "%s-QEMU", 
> > qemu_hw_version());
> > hw/scsi/scsi-bus.c:pstrcpy((char *) &r->buf[32], 4, 
> > qemu_hw_version());
> > hw/scsi/scsi-disk.c:s->version = g_strdup(qemu_hw_version());
> > target-i386/cpu.c:qemu_hw_version());
> > 
> > Cc: Andrzej Zaborowski 
> > Cc: John Snow 
> > Cc: Paolo Bonzini 
> > Cc: Hannes Reinecke 
> > Cc: Richard Henderson 
> > 
> > Eduardo Habkost (2):
> >   osdep: Change default value of qemu_hw_version() to "2.5+"
> >   pc: Don't set hw_version on pc-*-2.5
> > 
> >  hw/i386/pc_piix.c| 1 -
> >  hw/i386/pc_q35.c | 1 -
> >  include/hw/boards.h  | 5 +
> >  include/qemu/osdep.h | 4 
> >  util/osdep.c | 9 -
> >  5 files changed, 17 insertions(+), 3 deletions(-)
> > 
> > -- 
> > 2.1.0
> > 
> > 
> 
> -- 
> Eduardo



[Qemu-devel] [PULL 1/1] hostmem: Ignore ENOSYS while setting MPOL_DEFAULT

2015-11-23 Thread Eduardo Habkost
From: Pavel Fedin 

Currently hostmem backend fails if CONFIG_NUMA is enabled in QEMU
(the default) but NUMA is not supported by the kernel. This makes
it impossible to use ivshmem in such configurations.

This patch fixes the problem by ignoring ENOSYS error if policy is set to
MPOL_DEFAULT. This way the code behaves in the same way as if CONFIG_NUMA
was not defined. qemu will still fail if the user specifies some other
policy, so that the user knows it.

Signed-off-by: Pavel Fedin 
Reviewed-by: Eduardo Habkost 
Signed-off-by: Eduardo Habkost 
---
 backends/hostmem.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 41ba2af..1b4eb45 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -313,9 +313,11 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
Error **errp)
 assert(maxnode <= MAX_NODES);
 if (mbind(ptr, sz, backend->policy,
   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
-error_setg_errno(errp, errno,
- "cannot bind memory to host NUMA nodes");
-return;
+if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
+error_setg_errno(errp, errno,
+ "cannot bind memory to host NUMA nodes");
+return;
+}
 }
 #endif
 /* Preallocate memory after the NUMA policy has been instantiated.
-- 
2.1.0




Re: [Qemu-devel] [PATCH v5 00/10] sPAPR CPU hotplug

2015-11-23 Thread Christian Borntraeger
On 11/23/2015 12:54 PM, Peter Krempa wrote:
> On Fri, Nov 20, 2015 at 18:24:29 +0530, Bharata B Rao wrote:
>> This patchset adds CPU hotplug support for sPAPR PowerPC guests using
>> device_add and device_del commands
>>
>> (qemu) device_add POWER8-powerpc64-cpu,id=cpu0
> 
> Is there a reason why this uses 'device_add' rather than the 'cpu_add'
> command? Libvirt uses two separate approaches already. Due to legacy
> reasons we support the HMP 'cpu_set' command, and lately we added
> support for QMP 'cpu-add'. Using device_add here will introduce a
> different approach and will require yet another compatibility layer in
> libvirt to support this.

s390 and powerpc both started with cpu_add patches. Andreas Faerber
suggested then to only implement device_add. This was apparently discussed
at the last KVM forum.

Christian




Re: [Qemu-devel] [PULL for-2.5] tcg: Fix highwater check

2015-11-23 Thread Stefan Weil
Am 23.11.2015 um 13:45 schrieb Richard Henderson:
> From: John Clarke 
> 
> A simple typo in the variable to use when comparing vs the highwater mark.
> Reports are that qemu can in fact segfault occasionally due to this mistake.
> 
> Signed-off-by: John Clarke 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/tcg.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 682af8a..b20ed19 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -2443,7 +2443,7 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit 
> *gen_code_buf)
> one operation beginning below the high water mark cannot overrun
> the buffer completely.  Thus we can test for overflow after
> generating code without having to check during generation.  */
> -if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
> +if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
>  return -1;
>  }
>  }
> 

Is a comparison of void pointers portable? Or would it be better
to cast both sides to uintptr_t? Or fix the declaration of
code_gen_highwater to use an uint8_t pointer and cast s->code_ptr
to that type? code_gen_highwater should be fixed anyway because
in translate-all a difference is calculated with it.

Stefan




Re: [Qemu-devel] [PATCH V5 7/8] introduce xlnx-dp

2015-11-23 Thread KONRAD Frederic



Le 20/11/2015 11:06, Alistair Francis a écrit :

On Fri, Oct 16, 2015 at 7:11 PM,   wrote:

From: KONRAD Frederic 

This is the implementation of the DisplayPort.
It has an aux-bus to access dpcd and edid.

Graphic plane is connected to the channel 3.
Video plane is connected to the channel 0.
Audio stream are connected to the channels 4 and 5.

This patch doesn't pass checkpatch (I didn't test any others).
Can you run the series through checkpatch?
Yes but I might have introduced some coding style issues during the last 
changes.

Sorry for that will fix it!

Thanks,
Fred


Also a super small nit pick, some of your line splitting doesn't line up,
can you make sure that the function arguments and if statement conditions
line up with the previous line.


Signed-off-by: KONRAD Frederic 
Tested-By: Hyun Kwon 
---
  hw/display/Makefile.objs |1 +
  hw/display/xlnx_dp.c | 1370 ++
  include/hw/display/xlnx_dp.h |  110 
  3 files changed, 1481 insertions(+)
  create mode 100644 hw/display/xlnx_dp.c
  create mode 100644 include/hw/display/xlnx_dp.h

diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs
index 250a43f..3625ab2 100644
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -43,3 +43,4 @@ virtio-gpu.o-libs += $(VIRGL_LIBS)
  virtio-gpu-3d.o-cflags := $(VIRGL_CFLAGS)
  virtio-gpu-3d.o-libs += $(VIRGL_LIBS)
  obj-$(CONFIG_DPCD) += dpcd.o
+obj-$(CONFIG_XLNX_ZYNQMP) += xlnx_dp.o
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
new file mode 100644
index 000..e91221c
--- /dev/null
+++ b/hw/display/xlnx_dp.c
@@ -0,0 +1,1370 @@
+/*
+ * xlnx_dp.c
+ *
+ *  Copyright (C) 2015 : GreenSocs Ltd
+ *  http://www.greensocs.com/ , email: i...@greensocs.com
+ *
+ *  Developed by :
+ *  Frederic Konrad   
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ *
+ */
+
+#include "hw/display/xlnx_dp.h"
+
+#ifndef DEBUG_DP
+#define DEBUG_DP 0
+#endif
+
+#define DPRINTF(fmt, ...) do { 
\
+if (DEBUG_DP) {
\
+qemu_log("xlnx_dp: " fmt , ## __VA_ARGS__);
\
+}  
\
+} while (0);
+
+/*
+ * Register offset for DP.
+ */
+#define DP_LINK_BW_SET  (0x >> 2)
+#define DP_LANE_COUNT_SET   (0x0004 >> 2)
+#define DP_ENHANCED_FRAME_EN(0x0008 >> 2)
+#define DP_TRAINING_PATTERN_SET (0x000C >> 2)
+#define DP_LINK_QUAL_PATTERN_SET(0x0010 >> 2)
+#define DP_SCRAMBLING_DISABLE   (0x0014 >> 2)
+#define DP_DOWNSPREAD_CTRL  (0x0018 >> 2)
+#define DP_SOFTWARE_RESET   (0x001C >> 2)
+#define DP_TRANSMITTER_ENABLE   (0x0080 >> 2)
+#define DP_MAIN_STREAM_ENABLE   (0x0084 >> 2)
+#define DP_FORCE_SCRAMBLER_RESET(0x00C0 >> 2)
+#define DP_VERSION_REGISTER (0x00F8 >> 2)
+#define DP_CORE_ID  (0x00FC >> 2)
+
+#define DP_AUX_COMMAND_REGISTER (0x0100 >> 2)
+#define AUX_ADDR_ONLY_MASK  (0x1000)
+#define AUX_COMMAND_MASK(0x0F00)
+#define AUX_COMMAND_SHIFT   (8)
+#define AUX_COMMAND_NBYTES  (0x000F)
+
+#define DP_AUX_WRITE_FIFO   (0x0104 >> 2)
+#define DP_AUX_ADDRESS  (0x0108 >> 2)
+#define DP_AUX_CLOCK_DIVIDER(0x010C >> 2)
+#define DP_TX_USER_FIFO_OVERFLOW(0x0110 >> 2)
+#define DP_INTERRUPT_SIGNAL_STATE   (0x0130 >> 2)
+#define DP_AUX_REPLY_DATA   (0x0134 >> 2)
+#define DP_AUX_REPLY_CODE   (0x0138 >> 2)
+#define DP_AUX_REPLY_COUNT  (0x013C >> 2)
+#define DP_REPLY_DATA_COUNT (0x0148 >> 2)
+#define DP_REPLY_STATUS (0x014C >> 2)
+#define DP_HPD_DURATION (0x0150 >> 2)
+#define DP_MAIN_STREAM_HTOTAL   (0x0180 >> 2)
+#define DP_MAIN_STREAM_VTOTAL   (0x0184 >> 2)
+#define DP_MAIN_STREAM_POLARITY (0x0188 >> 2)
+#define DP_MAIN_STREAM_HSWIDTH  (0x018C >> 2)
+#define DP_MAIN_STREAM_VSWIDTH  (0x0190 >> 2)
+#define DP_MAIN_STREAM_HRES (0x0194 >> 

Re: [Qemu-devel] [PATCH V5 8/8] arm: xlnx-zynqmp: Add xlnx-dp and xlnx-dpdma

2015-11-23 Thread KONRAD Frederic



Le 20/11/2015 13:21, Alistair Francis a écrit :

On Fri, Oct 16, 2015 at 7:11 PM,   wrote:

From: KONRAD Frederic 

This adds the DP and the DPDMA to the Zynq MP platform.

Signed-off-by: KONRAD Frederic 
Reviewed-by: Peter Crosthwaite 
Tested-By: Hyun Kwon 
---
  hw/arm/xlnx-zynqmp.c | 20 
  include/hw/arm/xlnx-zynqmp.h |  5 +
  2 files changed, 25 insertions(+)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index b36ca3d..dfed5cd 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -32,6 +32,12 @@
  #define SATA_ADDR   0xFD0C
  #define SATA_NUM_PORTS  2

+#define DP_ADDR 0xfd4a
+#define DP_IRQ  113
+
+#define DPDMA_ADDR  0xfd4c
+#define DPDMA_IRQ   116
+
  static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = {
  0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E,
  };
@@ -97,6 +103,11 @@ static void xlnx_zynqmp_init(Object *obj)

  object_initialize(&s->sata, sizeof(s->sata), TYPE_SYSBUS_AHCI);
  qdev_set_parent_bus(DEVICE(&s->sata), sysbus_get_default());
+
+object_initialize(&s->dp, sizeof(s->dp), TYPE_XLNX_DP);
+qdev_set_parent_bus(DEVICE(&s->dp), sysbus_get_default());

New line


+object_initialize(&s->dpdma, sizeof(s->dpdma), TYPE_XLNX_DPDMA);
+qdev_set_parent_bus(DEVICE(&s->dpdma), sysbus_get_default());
  }

  static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
@@ -258,6 +269,15 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)

  sysbus_mmio_map(SYS_BUS_DEVICE(&s->sata), 0, SATA_ADDR);
  sysbus_connect_irq(SYS_BUS_DEVICE(&s->sata), 0, gic_spi[SATA_INTR]);
+
+sysbus_mmio_map(SYS_BUS_DEVICE(&s->dp), 0, DP_ADDR);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->dp), 0, gic_spi[DP_IRQ]);

New line


+sysbus_mmio_map(SYS_BUS_DEVICE(&s->dpdma), 0, DPDMA_ADDR);
+sysbus_connect_irq(SYS_BUS_DEVICE(&s->dpdma), 0, gic_spi[DPDMA_IRQ]);
+object_property_set_bool(OBJECT(&s->dp), true, "realized", &err);
+object_property_set_bool(OBJECT(&s->dpdma), true, "realized", &err);

Can you add something to check these errors?


Ok.
BTW I'll move the I2C and AUX device out of xlnx-dp and put that here.
Is that ok with you?

Thanks,
Fred


Thanks,

Alistair


+object_property_set_link(OBJECT(&s->dp), OBJECT(&s->dpdma), "dpdma",
+ &error_abort);
  }

  static Property xlnx_zynqmp_props[] = {
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 4005a99..5a4d6cc 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -24,6 +24,8 @@
  #include "hw/char/cadence_uart.h"
  #include "hw/ide/pci.h"
  #include "hw/ide/ahci.h"
+#include "hw/dma/xlnx_dpdma.h"
+#include "hw/display/xlnx_dp.h"

  #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp"
  #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \
@@ -66,6 +68,9 @@ typedef struct XlnxZynqMPState {

  char *boot_cpu;
  ARMCPU *boot_cpu_ptr;
+
+XlnxDPState dp;
+XlnxDPDMAState dpdma;
  }  XlnxZynqMPState;

  #define XLNX_ZYNQMP_H
--
1.9.0







Re: [Qemu-devel] [PATCH] target-i386: Do not set MCG_SER_P by default

2015-11-23 Thread Eduardo Habkost
On Sat, Nov 21, 2015 at 02:09:25AM +0100, Borislav Petkov wrote:
> On Sat, Nov 21, 2015 at 12:11:35AM +0100, Andreas Färber wrote:
> > Hi,
> > 
> > CC'ing qemu-devel.
> 
> Ah, thanks.
> 
> > Am 21.11.2015 um 00:01 schrieb Borislav Petkov:
> > > From: Borislav Petkov 
> > > 
> > > Software Error Recovery, i.e. SER, is purely an Intel feature and it
> > > shouldn't be set by default. Enable it only on Intel.

What happens when SER is enabled on an AMD CPU? If it really
should't be enabled, why is KVM returning it on
KVM_X86_GET_MCE_CAP_SUPPORTED?

> > 
> > Is this new in 2.5? Otherwise we would probably need compatibility code
> > in pc*.[ch] for incoming live migration from older versions.
> 
> It looks it is really old, AFAIK from 2010:
> 
> c0532a76b407 ("MCE: Relay UCR MCE to guest")
> 
> You'd need to be more verbose about pc*.[ch]. An example perhaps...?

If you change something that's guest-visible and not part of the
migration stream, you need to keep the old behavior on older
machine-types (e.g. pc-i440fx-2.4), or the CPU will change under
the guest's feet when migrating to another host.

For examples, see the recent commits to include/hw/i386/pc.h.
They are all about keeping compatibility when CPUID bits are
changed:

33b5e8c0 target-i386: Disable rdtscp on Opteron_G* CPU models
6aa91e4a target-i386: Remove POPCNT from qemu64 and qemu32 CPU models
71195672 target-i386: Remove ABM from qemu64 CPU model
0909ad24 target-i386: Remove SSE4a from qemu64 CPU model

In the case of this code, it looks like it's already broken
because the resulting mcg_cap depends on host kernel capabilities
(the ones reported by kvm_get_mce_cap_supported()), and the data
initialized by target-i386/cpu.c:mce_init() is silently
overwritten by kvm_arch_init_vcpu(). So we would need to fix that
before implementing a proper compatibility mechanism for
mcg_cap.

-- 
Eduardo



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Markus Armbruster
Marc-André Lureau  writes:

> Hi
>
> - Original Message -
>>
>> >> qemu-doc documents role only with chardev.  The code doesn't care.
>> >
>> > yeah, role is only really useful with a server. Another missing warning.
>> 
>> I think it makes sense only when we can migrate the shared memory
>> contents out-of-band.  Vaguely similar to block devices: either you
>> migrate them along (block migration), or you have to ensure they have
>> identical contents on the target in some other way.
>> 
>> Is the latter possible only with a server?
>
> The way ivshmem role works is:
> - master: will handle migration
> - peer: can't migrate
>
> It's not out-of-band, qemu handles the shared memory migration, even if
> the memory is owned by the server. In practice, it means you can only
> migrate one VM, or you migrate them all but you will migrate the shared
> memory N times and will have to synchronize in your guest app. I suppose 
> ivshmem
> "role" was designed to only migrate the master. Ability to migrate a pool of
> peer would be a significant new feature. I am not aware of such request.

I see.  But how is this supposed to work?

Before migration: one master and N peers connected to the server on host
A, N>=0.

After migration: one master and N' of the N peers connected to the
server on host B, N>=N'>=0, and the remaining N-N' peers still on host A
with their ivshmem device unplugged.

How would I do this even for N'==0?  I can't see how I'm supposted to
connect the migrated shared memory to a server on host B.

>> >> This is a byzantine mess even for QEMU standards.
>> >> 
>> >> Questions:
>> >> 
>> >> * Is it okay to have an unmapped BAR?
>> >
>> > I can't say much, though I remember I did some tests and it was ok.
>> 
>> Did you try chardev=...,size=S, where S is larger than what the server
>> provides?
>
> It will fall in check_shm_size().

Yes.  Called from ivshmem_read().  ivshmem_read() will then complain to
stderr, close the file descriptor we got from the server and leave the
BAR unmapped.  My question is how guests deal with that state.  Could be
anything from "detect the device is broken and fence it" to "kernel
panic".

Whatever it is, it could easily also happen if the guest wins the race
with the server and tries to use the device before it successfully got
its shared memory from the server.

>> A guest that fails there probably works for sufficiently small S only
>> when it loses the race with the server.
>> 
>> But my question is actually whether it's okay for a PCI device to
>> advertize a BAR, and then not map anything there.
>> 
>> Should realize wait for the server providing the shared memory?
>> 
>> >> * We actually have two distinct device kinds:
>> >> 
>> >>   - The basic device: shm with parameter size, or memdev
>> >> 
>> >>   - The doorbell-capable device: chardev with parameters size, vectors,
>> >> ioeventfd=on, msi
>> >> 
>> >>   Common parameters: use64, role, although the latter is only documented
>> >>   for the doorbell-capable device.
>> >> 
>
>> 
>> >> * Are we *sure* this is ready to become ABI?  I doubt it's ABI yet,
>> >>   because before Marc-André's massive rework, it was even worse (*much*
>> >>   worse), to a degree that makes me doubt anybody could use it
>> >>   seriously.
>> >
>> > It's supposed to be the same ABI as before, with the memdev addition.
>> 
>> Well, it's *crap*.  We shouldn't extend and known crap so it can get
>> used more widely, we should deprecate it in favour of something less
>> crappy.
>
> I think you overstate this, if it would be so bad I don't think anyone could 
> use
> it and it wouldn't probably be in qemu. We are mainly talking misconfiguration
> and missing features (that people may not actually need). I think
> before we split things, we can address your comments with more
> options checking and documentation.

We have at least two problems:

1. Unless the guest can reliably detect the doorbell feature, the
   doorbell feature is *broken*.

   As far as I can tell, a device with a doorbell behaves exactly like
   one without a doorbell until it got its shared memory from the
   server.  If that's correct, then doorbell detection is inherently
   racy.

   The only way to fix this in documentation is "broken, do not use".

   The maximally compatible way to fix this in code is to ensure the
   guest can't read register IVPosition before we got the shared memory
   from the server.  We can make realize wait, or the read.  The latter
   is probably an even worse idea.

   An easier way to fix it in code is splitting up the device, so guests
   can simply check the PCI device ID to figure out whether they have
   one with a doorbell.

   An even easier way is dropping the doorbell feature outright.

2. The UI is crap.

   We can fix this by rejecting nonsensical option combinations.
   However, the result will be more complex than splitting the device in
   two so that nonsensical options combinations are simply impossible.

   If we 

[Qemu-devel] [PATCH] PCI Trivial: remove superfluous code

2015-11-23 Thread Cao jin
remove superfluous code in do_pci_register_device(). See its caller:
pci_qdev_realize()

Signed-off-by: Cao jin 
---
 hw/pci/pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..4d16da0 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -878,7 +878,6 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
return NULL;
 }
 
-pci_dev->devfn = devfn;
 dma_as = pci_device_iommu_address_space(pci_dev);
 
 memory_region_init_alias(&pci_dev->bus_master_enable_region,
-- 
2.1.0


-- 
This message has been scanned for viruses and
dangerous content by FCNIC, and is
believed to be clean.




[Qemu-devel] [PATCH v2 1/1] parallels: add format spec

2015-11-23 Thread Denis V. Lunev
From: Vladimir Sementsov-Ogievskiy 

This specifies Parallels image format as implemented in Parallels Cloud
Server 6.10

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: John Snow 
CC: Stefan Hajnoczi 
---
v2: add license info
switch to offsets from types in field descriptions

MAINTAINERS  |   1 +
 docs/specs/parallels.txt | 227 +++
 2 files changed, 228 insertions(+)
 create mode 100644 docs/specs/parallels.txt

diff --git a/MAINTAINERS b/MAINTAINERS
index 9e1fa72..23441e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1471,6 +1471,7 @@ M: Denis V. Lunev 
 L: qemu-bl...@nongnu.org
 S: Supported
 F: block/parallels.c
+F: docs/specs/parallels.txt
 
 qed
 M: Stefan Hajnoczi 
diff --git a/docs/specs/parallels.txt b/docs/specs/parallels.txt
new file mode 100644
index 000..e58fb7c
--- /dev/null
+++ b/docs/specs/parallels.txt
@@ -0,0 +1,227 @@
+= License =
+
+Copyright (c) 2015 Denis Lunev
+Copyright (c) 2015 Vladimir Sementsov-Ogievskiy
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+= Parallels Expandable Image File Format =
+
+A Parallels expandable image file consists of three consecutive parts:
+* header
+* BAT
+* data area
+
+All numbers in a Parallels expandable image are stored in little-endian byte
+order.
+
+
+== Definitions ==
+
+SectorA 512-byte data chunk.
+
+Cluster   A data chunk of the size specified in the image header.
+  Currently, the default size is 1MiB (2048 sectors). In previous
+  versions, cluster sizes of 63 sectors, 256 and 252 kilobytes were
+  used.
+
+BAT   Block Allocation Table, an entity that contains information for
+  guest-to-host I/O data address translation.
+
+
+== Header ==
+
+The header is placed at the start of an image and contains the following
+fields:
+
+Bytes:
+   0 - 15:magic
+  Must contain "WithoutFreeSpace" or "WithouFreSpacExt".
+
+  16 - 19:version
+  Must be 2.
+
+  20 - 23:heads
+  Disk geometry parameter for guest.
+
+  24 - 27:cylinders
+  Disk geometry parameter for guest.
+
+  28 - 31:tracks
+  Cluster size, in sectors.
+
+  32 - 35:nb_bat_entries
+  Disk size, in clusters (BAT size).
+
+  36 - 43:nb_sectors
+  Disk size, in sectors.
+
+  For "WithoutFreeSpace" images:
+  Only the lowest 4 bytes are used. The highest 4 bytes must be
+  cleared in this case.
+
+  For "WithouFreSpacExt" images, there are no such
+  restrictions.
+
+  44 - 47:in_use
+  Set to 0x746F6E59 when the image is opened by software in R/W
+  mode; set to 0x312e3276 when the image is closed.
+
+  A zero in this field means that the image was opened by an old
+  version of the software that doesn't support Format Extension
+  (see below).
+
+  Other values are not allowed.
+
+  48 - 51:data_off
+  An offset, in sectors, from the start of the file to the start of
+  the data area.
+
+  For "WithoutFreeSpace" images:
+  - If data_off is zero, the offset is calculated as the end of BAT
+table plus some padding to ensure sector size alignment.
+  - If data_off is non-zero, the offset should be aligned to sector
+size. However it is recommended to align it to cluster size for
+newly created images.
+
+  For "WithouFreSpacExt" images:
+  data_off must be non-zero and aligned to cluster size.
+
+  52 - 55:flags
+  Miscellaneous flags.
+
+  Bit 0: Empty Image bit. If set, the image should be
+ considered clear.
+
+  Bits 2-31: Unused.
+
+  56 - 63:ext_off
+  Format Extension offset, an offset, in sectors, from the start of
+  the file to the start of the Format Extension Cluster.
+
+  ext_off must meet the same requirements as cluster offsets
+  defined by BAT entries (see below).
+
+
+== BAT ==
+
+BAT is placed immediately after the image header. In the file, BAT is a
+contiguous array of 32-bit unsigned little-endian integers with
+(bat_entries * 4) bytes size.
+
+Each BAT entry contains an offset from the start of the file to the
+corresponding cluster. The offset set in clusters for "WithouFreSpacExt" images
+and in sectors for "WithoutFreeSpace" images.
+
+If a BAT entry is zero, the corresponding cluster is not allocated and should
+be considered as filled with zeroes.
+
+Cluster offsets specified by BAT entries must meet the following requirements:
+- the value must not be lower than data offset (provided by header.data_off

Re: [Qemu-devel] [PATCH WIP 01/30] crypto: add QCryptoSecret object class for password/key handling

2015-11-23 Thread Markus Armbruster
"Daniel P. Berrange"  writes:

> On Fri, Nov 20, 2015 at 03:09:25PM -0700, Eric Blake wrote:
>> On 11/20/2015 11:04 AM, Daniel P. Berrange wrote:
>> > +
>> > +static const char *base64_valid_chars =
>> > +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
>> > +
>> > +static int
>> > +qcrypto_secret_validate_base64(const uint8_t *input,
>> > +   size_t inputlen,
>> > +   Error **errp)
>> 
>> Don't we already have base64 utility methods available?
>
> We normally use glib,  g_base64_encode/decode. Unfortunately the
> decode method doesn't provide any usefull error reporting facility.
> It just silently skips any characters that are outside the valid
> set.  So the only way I could get any kind of sensible error report
> was to do this validation myself against the set of permitted base64
> characters.

Yes.  Same problem elsewhere, e.g. ringbuf-write.  qapi-schema.json:

#  - base64: data must be base64 encoded text.  Its binary
#decoding gets written.
#Bug: invalid base64 is currently not rejected.
#Whitespace *is* invalid.

This suggests that we shouldn't bury this in crypto/, but instead add it
to util/.

A replacement for g_base64_decode() could be easier to use than a
checker function to use in addition to g_base64_decode(),

[...]



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Marc-André Lureau
Hi

- Original Message -
> > "role" was designed to only migrate the master. Ability to migrate a pool
> > of
> > peer would be a significant new feature. I am not aware of such request.
> 
> I see.  But how is this supposed to work?
> 
> Before migration: one master and N peers connected to the server on host
> A, N>=0.
> 
> After migration: one master and N' of the N peers connected to the
> server on host B, N>=N'>=0, and the remaining N-N' peers still on host A
> with their ivshmem device unplugged.
> 
> How would I do this even for N'==0?  I can't see how I'm supposted to
> connect the migrated shared memory to a server on host B.

I am not sure I understand you.

You can't migrate the peers.

As I said, "ability to migrate a pool of peer would be a significant new 
feature".

> >> Did you try chardev=...,size=S, where S is larger than what the server
> >> provides?
> >
> > It will fall in check_shm_size().
> 
> Yes.  Called from ivshmem_read().  ivshmem_read() will then complain to
> stderr, close the file descriptor we got from the server and leave the
> BAR unmapped.  My question is how guests deal with that state.  Could be
> anything from "detect the device is broken and fence it" to "kernel
> panic".
> Whatever it is, it could easily also happen if the guest wins the race
> with the server and tries to use the device before it successfully got
> its shared memory from the server.

It's nothing bad from what I remember on qemu side. On guest side, it
depends how your driver/user is implemented I suppose. To me, it's not
a normal case, and the error should be enough to diagnose it.

> 1. Unless the guest can reliably detect the doorbell feature, the
>doorbell feature is *broken*.
> 
>As far as I can tell, a device with a doorbell behaves exactly like
>one without a doorbell until it got its shared memory from the
>server.  If that's correct, then doorbell detection is inherently
>racy.

There are many ways you can do synchronization.
In test_ivshmem_server(), I trivially wait for the membar with the
required signature to be mapped. Verify that peers have different ids,
and then start using the doorbell. That seems good enough.

>The only way to fix this in documentation is "broken, do not use".

It works fine in the tests. Feel free to point out races or other issues.

>The maximally compatible way to fix this in code is to ensure the
>guest can't read register IVPosition before we got the shared memory
>from the server.  We can make realize wait, or the read.  The latter
>is probably an even worse idea.
> 
>An easier way to fix it in code is splitting up the device, so guests
>can simply check the PCI device ID to figure out whether they have
>one with a doorbell.
> 
>An even easier way is dropping the doorbell feature outright.
> 
> 2. The UI is crap.
> 
>We can fix this by rejecting nonsensical option combinations.

Yes, I think it's the simplest way for now. I dislike having to break stuff 
when you can overcome it with a few more checks.

>However, the result will be more complex than splitting the device in
>two so that nonsensical options combinations are simply impossible.

I disagree, adding more checks will add a few dozen lines with minimal impact. 
Splitting things will break stuff and require significant effort to share 
correctly what can be shared etc.

>If we need to split it anyway to fix the doorbell, we can clean up
>the UI at next to no cost.

I don't think the doorbell is broken.



Re: [Qemu-devel] [PULL for-2.5] tcg: Fix highwater check

2015-11-23 Thread Richard Henderson

On 11/23/2015 02:16 PM, Stefan Weil wrote:

Am 23.11.2015 um 13:45 schrieb Richard Henderson:

From: John Clarke 

A simple typo in the variable to use when comparing vs the highwater mark.
Reports are that qemu can in fact segfault occasionally due to this mistake.

Signed-off-by: John Clarke 
Signed-off-by: Richard Henderson 
---
  tcg/tcg.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 682af8a..b20ed19 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2443,7 +2443,7 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit 
*gen_code_buf)
 one operation beginning below the high water mark cannot overrun
 the buffer completely.  Thus we can test for overflow after
 generating code without having to check during generation.  */
-if (unlikely(s->code_gen_ptr > s->code_gen_highwater)) {
+if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
  return -1;
  }
  }



Is a comparison of void pointers portable?


Of course.  Particularly since these really are pointers into the same 
allocated object.  That's 100% ANSI C.



code_gen_highwater should be fixed anyway because
in translate-all a difference is calculated with it.


Yes, but we freely make use of this gcc extension in many places.


r~




[Qemu-devel] [PATCH 0/2] Add basic "detach" support for dump-guest-memory

2015-11-23 Thread Peter Xu
Currently, dump-guest-memory supports synchronous operation only. This patch
sets are adding "detach" support for it (just like "migrate -d" for
migration). When "-d" is provided, dump-guest-memory command will return
immediately without hanging user. This should be useful when the backend
storage for the dump file is very slow.

Peter Xu (2):
  dump-guest-memory: add "detach" flag for QMP/HMP interfaces
  dump-guest-memory: add basic "detach" support.

 dump.c| 62 ++-
 hmp-commands.hx   |  5 +++--
 hmp.c |  3 ++-
 include/sysemu/dump.h |  4 
 qapi-schema.json  |  3 ++-
 qmp-commands.hx   |  4 ++--
 qmp.c |  9 
 vl.c  |  3 +++
 8 files changed, 81 insertions(+), 12 deletions(-)

-- 
2.4.3




[Qemu-devel] [PATCH 2/2] dump-guest-memory: add basic "detach" support.

2015-11-23 Thread Peter Xu
This will allow the user specify "-d" (just like command
"migrate") when using "dump-guest-memory" command. When
specified, one background thread is created to do the dump work.
One flag is added to show whether there is a background dump
work in progress.

Signed-off-by: Peter Xu 
---
 dump.c| 59 ++-
 include/sysemu/dump.h |  4 
 qmp.c |  9 
 vl.c  |  3 +++
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/dump.c b/dump.c
index 3ec3423..c2e14cd 100644
--- a/dump.c
+++ b/dump.c
@@ -1442,6 +1442,9 @@ static void dump_init(DumpState *s, int fd, bool 
has_format,
 Error *err = NULL;
 int ret;
 
+s->has_format = has_format;
+s->format = format;
+
 /* kdump-compressed is conflict with paging and filter */
 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
 assert(!paging && !has_filter);
@@ -1595,6 +1598,45 @@ cleanup:
 dump_cleanup(s);
 }
 
+/* whether there is a dump in progress */
+extern bool dump_in_progress_p;
+
+static bool dump_ownership_set(bool value)
+{
+return atomic_xchg(&dump_in_progress_p, value);
+}
+
+/* return true when owner taken, false otherwise */
+static bool dump_ownership_take(void)
+{
+bool ret = dump_ownership_set(1);
+return ret == 0;
+}
+
+/* we should only call this after all dump work finished */
+static void dump_ownership_release(void)
+{
+dump_ownership_set(0);
+}
+
+static void dump_process(DumpState *s, Error **errp)
+{
+if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
+create_kdump_vmcore(s, errp);
+} else {
+create_vmcore(s, errp);
+}
+dump_ownership_release();
+}
+
+static void *dump_thread(void *data)
+{
+DumpState *s = (DumpState *)data;
+dump_process(s, NULL);
+g_free(s);
+return NULL;
+}
+
 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
int64_t begin, bool has_length,
int64_t length, bool has_format,
@@ -1662,6 +1704,12 @@ void qmp_dump_guest_memory(bool paging, const char 
*file, bool has_begin,
 return;
 }
 
+/* we could only allow one dump at a time. */
+if (!dump_ownership_take()) {
+error_setg(errp, "another dump is in progress.");
+return;
+}
+
 s = g_malloc0(sizeof(DumpState));
 
 dump_init(s, fd, has_format, format, paging, has_begin,
@@ -1672,13 +1720,14 @@ void qmp_dump_guest_memory(bool paging, const char 
*file, bool has_begin,
 return;
 }
 
-if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
-create_kdump_vmcore(s, errp);
+if (!detach) {
+dump_process(s, errp);
+g_free(s);
 } else {
-create_vmcore(s, errp);
+qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
+   s, QEMU_THREAD_DETACHED);
+/* DumpState is freed in dump thread */
 }
-
-g_free(s);
 }
 
 DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 7e4ec5c..2aeffc8 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -183,6 +183,10 @@ typedef struct DumpState {
 off_t offset_page;  /* offset of page part in vmcore */
 size_t num_dumpable;/* number of page that can be dumped */
 uint32_t flag_compress; /* indicate the compression format */
+
+QemuThread dump_thread; /* only used when do async dump */
+bool has_format;/* whether format is provided */
+DumpGuestMemoryFormat format; /* valid only if has_format == true */
 } DumpState;
 
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
diff --git a/qmp.c b/qmp.c
index 0a1fa19..ea57b57 100644
--- a/qmp.c
+++ b/qmp.c
@@ -168,12 +168,21 @@ SpiceInfo *qmp_query_spice(Error **errp)
 };
 #endif
 
+extern bool dump_in_progress_p;
+
 void qmp_cont(Error **errp)
 {
 Error *local_err = NULL;
 BlockBackend *blk;
 BlockDriverState *bs;
 
+/* if there is a dump in background, we should wait until the dump
+ * finished */
+if (dump_in_progress_p) {
+error_setg(errp, "there is a dump in process, please wait.");
+return;
+}
+
 if (runstate_needs_reset()) {
 error_setg(errp, "Resetting the Virtual Machine is required");
 return;
diff --git a/vl.c b/vl.c
index 525929b..ef7a936 100644
--- a/vl.c
+++ b/vl.c
@@ -204,6 +204,9 @@ bool xen_allowed;
 uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_EMULATE;
 
+/* whether dump is in progress */
+bool dump_in_progress_p = false;
+
 static int has_defaults = 1;
 static int default_serial = 1;
 static int default_parallel = 1;
-- 
2.4.3




[Qemu-devel] [PATCH REPOST 0/2] Add basic "detach" support for dump-guest-memory

2015-11-23 Thread Peter Xu
Currently, dump-guest-memory supports synchronous operation only. This patch
sets are adding "detach" support for it (just like "migrate -d" for
migration). When "-d" is provided, dump-guest-memory command will return
immediately without hanging user. This should be useful when the backend
storage for the dump file is very slow.

Peter Xu (2):
  dump-guest-memory: add "detach" flag for QMP/HMP interfaces
  dump-guest-memory: add basic "detach" support.

 dump.c| 62 ++-
 hmp-commands.hx   |  5 +++--
 hmp.c |  3 ++-
 include/sysemu/dump.h |  4 
 qapi-schema.json  |  3 ++-
 qmp-commands.hx   |  4 ++--
 qmp.c |  9 
 vl.c  |  3 +++
 8 files changed, 81 insertions(+), 12 deletions(-)

-- 
2.4.3




[Qemu-devel] [PATCH 1/2] dump-guest-memory: add "detach" flag for QMP/HMP interfaces

2015-11-23 Thread Peter Xu
This patch only add the interfaces, but not implementing them.

Signed-off-by: Peter Xu 
---
 dump.c   | 3 ++-
 hmp-commands.hx  | 5 +++--
 hmp.c| 3 ++-
 qapi-schema.json | 3 ++-
 qmp-commands.hx  | 4 ++--
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/dump.c b/dump.c
index 78b7d84..3ec3423 100644
--- a/dump.c
+++ b/dump.c
@@ -1598,7 +1598,8 @@ cleanup:
 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
int64_t begin, bool has_length,
int64_t length, bool has_format,
-   DumpGuestMemoryFormat format, Error **errp)
+   DumpGuestMemoryFormat format, bool detach,
+   Error **errp)
 {
 const char *p;
 int fd = -1;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index bb52e4d..8e27671 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1056,10 +1056,11 @@ ETEXI
 
 {
 .name   = "dump-guest-memory",
-.args_type  = 
"paging:-p,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
-.params = "[-p] [-z|-l|-s] filename [begin length]",
+.args_type  = 
"paging:-p,detach:-d,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
+.params = "[-p] [-d] [-z|-l|-s] filename [begin length]",
 .help   = "dump guest memory into file 'filename'.\n\t\t\t"
   "-p: do paging to get guest's memory mapping.\n\t\t\t"
+  "-d: return immediately (not wait for 
completion).\n\t\t\t"
   "-z: dump in kdump-compressed format, with zlib 
compression.\n\t\t\t"
   "-l: dump in kdump-compressed format, with lzo 
compression.\n\t\t\t"
   "-s: dump in kdump-compressed format, with snappy 
compression.\n\t\t\t"
diff --git a/hmp.c b/hmp.c
index 2140605..4c5480d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1580,6 +1580,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
*qdict)
 {
 Error *err = NULL;
 bool paging = qdict_get_try_bool(qdict, "paging", false);
+bool detach = qdict_get_try_bool(qdict, "detach", false);
 bool zlib = qdict_get_try_bool(qdict, "zlib", false);
 bool lzo = qdict_get_try_bool(qdict, "lzo", false);
 bool snappy = qdict_get_try_bool(qdict, "snappy", false);
@@ -1619,7 +1620,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
*qdict)
 prot = g_strconcat("file:", file, NULL);
 
 qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
-  true, dump_format, &err);
+  true, dump_format, detach, &err);
 hmp_handle_error(mon, &err);
 g_free(prot);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 8b1a423..1211082 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2132,7 +2132,8 @@
 ##
 { 'command': 'dump-guest-memory',
   'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
-'*length': 'int', '*format': 'DumpGuestMemoryFormat' } }
+'*length': 'int', '*format': 'DumpGuestMemoryFormat',
+'detach': 'bool'} }
 
 ##
 # @DumpGuestMemoryCapability:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 9d8b42f..86864f6 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -840,8 +840,8 @@ EQMP
 
 {
 .name   = "dump-guest-memory",
-.args_type  = "paging:b,protocol:s,begin:i?,end:i?,format:s?",
-.params = "-p protocol [begin] [length] [format]",
+.args_type  = "paging:b,detach:d,protocol:s,begin:i?,end:i?,format:s?",
+.params = "-p protocol [-d] [begin] [length] [format]",
 .help   = "dump guest memory to file",
 .mhandler.cmd_new = qmp_marshal_dump_guest_memory,
 },
-- 
2.4.3




[Qemu-devel] [PATCH REPOST 2/2] dump-guest-memory: add basic "detach" support.

2015-11-23 Thread Peter Xu
This will allow the user specify "-d" (just like command
"migrate") when using "dump-guest-memory" command. When
specified, one background thread is created to do the dump work.
One flag is added to show whether there is a background dump
work in progress.

Signed-off-by: Peter Xu 
---
 dump.c| 59 ++-
 include/sysemu/dump.h |  4 
 qmp.c |  9 
 vl.c  |  3 +++
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/dump.c b/dump.c
index 3ec3423..c2e14cd 100644
--- a/dump.c
+++ b/dump.c
@@ -1442,6 +1442,9 @@ static void dump_init(DumpState *s, int fd, bool 
has_format,
 Error *err = NULL;
 int ret;
 
+s->has_format = has_format;
+s->format = format;
+
 /* kdump-compressed is conflict with paging and filter */
 if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
 assert(!paging && !has_filter);
@@ -1595,6 +1598,45 @@ cleanup:
 dump_cleanup(s);
 }
 
+/* whether there is a dump in progress */
+extern bool dump_in_progress_p;
+
+static bool dump_ownership_set(bool value)
+{
+return atomic_xchg(&dump_in_progress_p, value);
+}
+
+/* return true when owner taken, false otherwise */
+static bool dump_ownership_take(void)
+{
+bool ret = dump_ownership_set(1);
+return ret == 0;
+}
+
+/* we should only call this after all dump work finished */
+static void dump_ownership_release(void)
+{
+dump_ownership_set(0);
+}
+
+static void dump_process(DumpState *s, Error **errp)
+{
+if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
+create_kdump_vmcore(s, errp);
+} else {
+create_vmcore(s, errp);
+}
+dump_ownership_release();
+}
+
+static void *dump_thread(void *data)
+{
+DumpState *s = (DumpState *)data;
+dump_process(s, NULL);
+g_free(s);
+return NULL;
+}
+
 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
int64_t begin, bool has_length,
int64_t length, bool has_format,
@@ -1662,6 +1704,12 @@ void qmp_dump_guest_memory(bool paging, const char 
*file, bool has_begin,
 return;
 }
 
+/* we could only allow one dump at a time. */
+if (!dump_ownership_take()) {
+error_setg(errp, "another dump is in progress.");
+return;
+}
+
 s = g_malloc0(sizeof(DumpState));
 
 dump_init(s, fd, has_format, format, paging, has_begin,
@@ -1672,13 +1720,14 @@ void qmp_dump_guest_memory(bool paging, const char 
*file, bool has_begin,
 return;
 }
 
-if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
-create_kdump_vmcore(s, errp);
+if (!detach) {
+dump_process(s, errp);
+g_free(s);
 } else {
-create_vmcore(s, errp);
+qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
+   s, QEMU_THREAD_DETACHED);
+/* DumpState is freed in dump thread */
 }
-
-g_free(s);
 }
 
 DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h
index 7e4ec5c..2aeffc8 100644
--- a/include/sysemu/dump.h
+++ b/include/sysemu/dump.h
@@ -183,6 +183,10 @@ typedef struct DumpState {
 off_t offset_page;  /* offset of page part in vmcore */
 size_t num_dumpable;/* number of page that can be dumped */
 uint32_t flag_compress; /* indicate the compression format */
+
+QemuThread dump_thread; /* only used when do async dump */
+bool has_format;/* whether format is provided */
+DumpGuestMemoryFormat format; /* valid only if has_format == true */
 } DumpState;
 
 uint16_t cpu_to_dump16(DumpState *s, uint16_t val);
diff --git a/qmp.c b/qmp.c
index 0a1fa19..ea57b57 100644
--- a/qmp.c
+++ b/qmp.c
@@ -168,12 +168,21 @@ SpiceInfo *qmp_query_spice(Error **errp)
 };
 #endif
 
+extern bool dump_in_progress_p;
+
 void qmp_cont(Error **errp)
 {
 Error *local_err = NULL;
 BlockBackend *blk;
 BlockDriverState *bs;
 
+/* if there is a dump in background, we should wait until the dump
+ * finished */
+if (dump_in_progress_p) {
+error_setg(errp, "there is a dump in process, please wait.");
+return;
+}
+
 if (runstate_needs_reset()) {
 error_setg(errp, "Resetting the Virtual Machine is required");
 return;
diff --git a/vl.c b/vl.c
index 525929b..ef7a936 100644
--- a/vl.c
+++ b/vl.c
@@ -204,6 +204,9 @@ bool xen_allowed;
 uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_EMULATE;
 
+/* whether dump is in progress */
+bool dump_in_progress_p = false;
+
 static int has_defaults = 1;
 static int default_serial = 1;
 static int default_parallel = 1;
-- 
2.4.3




[Qemu-devel] [PATCH REPOST 1/2] dump-guest-memory: add "detach" flag for QMP/HMP interfaces

2015-11-23 Thread Peter Xu
This patch only add the interfaces, but not implementing them.

Signed-off-by: Peter Xu 
---
 dump.c   | 3 ++-
 hmp-commands.hx  | 5 +++--
 hmp.c| 3 ++-
 qapi-schema.json | 3 ++-
 qmp-commands.hx  | 4 ++--
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/dump.c b/dump.c
index 78b7d84..3ec3423 100644
--- a/dump.c
+++ b/dump.c
@@ -1598,7 +1598,8 @@ cleanup:
 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
int64_t begin, bool has_length,
int64_t length, bool has_format,
-   DumpGuestMemoryFormat format, Error **errp)
+   DumpGuestMemoryFormat format, bool detach,
+   Error **errp)
 {
 const char *p;
 int fd = -1;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index bb52e4d..8e27671 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1056,10 +1056,11 @@ ETEXI
 
 {
 .name   = "dump-guest-memory",
-.args_type  = 
"paging:-p,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
-.params = "[-p] [-z|-l|-s] filename [begin length]",
+.args_type  = 
"paging:-p,detach:-d,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
+.params = "[-p] [-d] [-z|-l|-s] filename [begin length]",
 .help   = "dump guest memory into file 'filename'.\n\t\t\t"
   "-p: do paging to get guest's memory mapping.\n\t\t\t"
+  "-d: return immediately (not wait for 
completion).\n\t\t\t"
   "-z: dump in kdump-compressed format, with zlib 
compression.\n\t\t\t"
   "-l: dump in kdump-compressed format, with lzo 
compression.\n\t\t\t"
   "-s: dump in kdump-compressed format, with snappy 
compression.\n\t\t\t"
diff --git a/hmp.c b/hmp.c
index 2140605..4c5480d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1580,6 +1580,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
*qdict)
 {
 Error *err = NULL;
 bool paging = qdict_get_try_bool(qdict, "paging", false);
+bool detach = qdict_get_try_bool(qdict, "detach", false);
 bool zlib = qdict_get_try_bool(qdict, "zlib", false);
 bool lzo = qdict_get_try_bool(qdict, "lzo", false);
 bool snappy = qdict_get_try_bool(qdict, "snappy", false);
@@ -1619,7 +1620,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
*qdict)
 prot = g_strconcat("file:", file, NULL);
 
 qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
-  true, dump_format, &err);
+  true, dump_format, detach, &err);
 hmp_handle_error(mon, &err);
 g_free(prot);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 8b1a423..1211082 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2132,7 +2132,8 @@
 ##
 { 'command': 'dump-guest-memory',
   'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
-'*length': 'int', '*format': 'DumpGuestMemoryFormat' } }
+'*length': 'int', '*format': 'DumpGuestMemoryFormat',
+'detach': 'bool'} }
 
 ##
 # @DumpGuestMemoryCapability:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 9d8b42f..86864f6 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -840,8 +840,8 @@ EQMP
 
 {
 .name   = "dump-guest-memory",
-.args_type  = "paging:b,protocol:s,begin:i?,end:i?,format:s?",
-.params = "-p protocol [begin] [length] [format]",
+.args_type  = "paging:b,detach:d,protocol:s,begin:i?,end:i?,format:s?",
+.params = "-p protocol [-d] [begin] [length] [format]",
 .help   = "dump guest memory to file",
 .mhandler.cmd_new = qmp_marshal_dump_guest_memory,
 },
-- 
2.4.3




[Qemu-devel] [Bug 1518969] [NEW] Instance of QEMU doesn't unplug virtio scsi disk after device_del and drive_del commands

2015-11-23 Thread Marian Horban
Public bug reported:

device_del and drive_del commands don't cause virtio disk detaching

Steps to reproduce:
1. Run instance

2. Attach virtio scsi disk

3. Reboot instance

4. Immediately after reboot detach disk with QEMU commands:
device_del
drive_del

Expected result:
Disk should be detached anyway

Actual:
Domain info contains attached disk even after waiting long period of time(5 min 
in my case).

Additional info:
QEMU process:
root 29010 42.6  1.9 562036 61272 ?Rl   03:42   0:01 
/usr/bin/qemu-system-x86_64 -name instance-0069 -S -machine 
pc-i440fx-trusty,accel=tcg,usb=off -m 64 -realtime mlock=off -smp 
1,sockets=1,cores=1,threads=1 -uuid d2418536-2547-4740-96b5-0d4f1d74b9f3 
-smbios type=1,manufacturer=OpenStack Foundation,product=OpenStack 
Nova,version=13.0.0,serial=1fd8f69a-909b-4ed1-aae9-4fc9318fc622,uuid=d2418536-2547-4740-96b5-0d4f1d74b9f3,family=Virtual
 Machine -no-user-config -nodefaults -chardev 
socket,id=charmonitor,path=/var/lib/libvirt/qemu/instance-0069.monitor,server,nowait
 -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown 
-boot strict=on -kernel 
/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/kernel 
-initrd 
/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/ramdisk 
-append root=/dev/vda console=tty0 console=ttyS0 no_timer_check -device 
piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive 
file=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/disk,if=none,id=drive-virtio-disk0,format=qcow2,cache=none
 -device 
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
 -drive 
file=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/disk.config,if=none,id=drive-ide0-1-1,readonly=on,format=raw,cache=none
 -device ide-cd,bus=ide.1,unit=1,drive=drive-ide0-1-1,id=ide0-1-1 -netdev 
tap,fd=18,id=hostnet0 -device 
virtio-net-pci,netdev=hostnet0,id=net0,mac=fa:16:3e:1a:10:3d,bus=pci.0,addr=0x3 
-chardev 
file,id=charserial0,path=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/console.log
 -device isa-serial,chardev=charserial0,id=serial0 -chardev pty,id=charserial1 
-device isa-serial,chardev=charserial1,id=serial1 -vnc 127.0.0.1:1 -k en-us 
-device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device 
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5

QEMU version:
qemu-system-x86_64 --version
QEMU emulator version 2.0.0 (Debian 2.0.0+dfsg-2ubuntu1.19), Copyright (c) 
2003-2008 Fabrice Bellard

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1518969

Title:
  Instance of QEMU doesn't unplug virtio scsi disk after device_del and
  drive_del commands

Status in QEMU:
  New

Bug description:
  device_del and drive_del commands don't cause virtio disk detaching

  Steps to reproduce:
  1. Run instance

  2. Attach virtio scsi disk

  3. Reboot instance

  4. Immediately after reboot detach disk with QEMU commands:
  device_del
  drive_del

  Expected result:
  Disk should be detached anyway

  Actual:
  Domain info contains attached disk even after waiting long period of time(5 
min in my case).

  Additional info:
  QEMU process:
  root 29010 42.6  1.9 562036 61272 ?Rl   03:42   0:01 
/usr/bin/qemu-system-x86_64 -name instance-0069 -S -machine 
pc-i440fx-trusty,accel=tcg,usb=off -m 64 -realtime mlock=off -smp 
1,sockets=1,cores=1,threads=1 -uuid d2418536-2547-4740-96b5-0d4f1d74b9f3 
-smbios type=1,manufacturer=OpenStack Foundation,product=OpenStack 
Nova,version=13.0.0,serial=1fd8f69a-909b-4ed1-aae9-4fc9318fc622,uuid=d2418536-2547-4740-96b5-0d4f1d74b9f3,family=Virtual
 Machine -no-user-config -nodefaults -chardev 
socket,id=charmonitor,path=/var/lib/libvirt/qemu/instance-0069.monitor,server,nowait
 -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown 
-boot strict=on -kernel 
/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/kernel 
-initrd 
/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/ramdisk 
-append root=/dev/vda console=tty0 console=ttyS0 no_timer_check -device 
piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive 
file=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/disk,if=none,id=drive-virtio-disk0,format=qcow2,cache=none
 -device 
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
 -drive 
file=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5-0d4f1d74b9f3/disk.config,if=none,id=drive-ide0-1-1,readonly=on,format=raw,cache=none
 -device ide-cd,bus=ide.1,unit=1,drive=drive-ide0-1-1,id=ide0-1-1 -netdev 
tap,fd=18,id=hostnet0 -device 
virtio-net-pci,netdev=hostnet0,id=net0,mac=fa:16:3e:1a:10:3d,bus=pci.0,addr=0x3 
-chardev 
file,id=charserial0,path=/opt/stack/data/nova/instances/d2418536-2547-4740-96b5

Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Markus Armbruster
Marc-André Lureau  writes:

> Hi
>
> - Original Message -
>> > "role" was designed to only migrate the master. Ability to migrate a pool
>> > of
>> > peer would be a significant new feature. I am not aware of such request.
>> 
>> I see.  But how is this supposed to work?
>> 
>> Before migration: one master and N peers connected to the server on host
>> A, N>=0.
>> 
>> After migration: one master and N' of the N peers connected to the
>> server on host B, N>=N'>=0, and the remaining N-N' peers still on host A
>> with their ivshmem device unplugged.
>> 
>> How would I do this even for N'==0?  I can't see how I'm supposted to
>> connect the migrated shared memory to a server on host B.
>
> I am not sure I understand you.
>
> You can't migrate the peers.

Then explain the case N'=0 to me: how can you migrate the master so that
it's connected to a server afterwards?

> As I said, "ability to migrate a pool of peer would be a significant
> new feature".
>
>> >> Did you try chardev=...,size=S, where S is larger than what the server
>> >> provides?
>> >
>> > It will fall in check_shm_size().
>> 
>> Yes.  Called from ivshmem_read().  ivshmem_read() will then complain to
>> stderr, close the file descriptor we got from the server and leave the
>> BAR unmapped.  My question is how guests deal with that state.  Could be
>> anything from "detect the device is broken and fence it" to "kernel
>> panic".
>> Whatever it is, it could easily also happen if the guest wins the race
>> with the server and tries to use the device before it successfully got
>> its shared memory from the server.
>
> It's nothing bad from what I remember on qemu side. On guest side, it
> depends how your driver/user is implemented I suppose. To me, it's not
> a normal case, and the error should be enough to diagnose it.
>
>> 1. Unless the guest can reliably detect the doorbell feature, the
>>doorbell feature is *broken*.
>> 
>>As far as I can tell, a device with a doorbell behaves exactly like
>>one without a doorbell until it got its shared memory from the
>>server.  If that's correct, then doorbell detection is inherently
>>racy.
>
> There are many ways you can do synchronization.
> In test_ivshmem_server(), I trivially wait for the membar with the
> required signature to be mapped. Verify that peers have different ids,
> and then start using the doorbell. That seems good enough.
>
>>The only way to fix this in documentation is "broken, do not use".
>
> It works fine in the tests. Feel free to point out races or other issues.

I think I did: doorbell detection is inherently racy.

If you think it isn't, please refute my reasoning.

>>The maximally compatible way to fix this in code is to ensure the
>>guest can't read register IVPosition before we got the shared memory
>>from the server.  We can make realize wait, or the read.  The latter
>>is probably an even worse idea.
>> 
>>An easier way to fix it in code is splitting up the device, so guests
>>can simply check the PCI device ID to figure out whether they have
>>one with a doorbell.
>> 
>>An even easier way is dropping the doorbell feature outright.
>> 
>> 2. The UI is crap.
>> 
>>We can fix this by rejecting nonsensical option combinations.
>
> Yes, I think it's the simplest way for now. I dislike having to break
> stuff when you can overcome it with a few more checks.
>
>>However, the result will be more complex than splitting the device in
>>two so that nonsensical options combinations are simply impossible.
>
> I disagree, adding more checks will add a few dozen lines with minimal
> impact. Splitting things will break stuff and require significant
> effort to share correctly what can be shared etc.
>
>>If we need to split it anyway to fix the doorbell, we can clean up
>>the UI at next to no cost.
>
> I don't think the doorbell is broken.

If it's not broken, please explain to me how the guest should find out
whether its ivshmem device sports a doorbell.



Re: [Qemu-devel] [RFC PATCH 0/9] vhost-nvme: new qemu nvme backend using nvme target

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 09:17, Ming Lin wrote:
> On Sat, 2015-11-21 at 14:11 +0100, Paolo Bonzini wrote:
>>
>> On 20/11/2015 01:20, Ming Lin wrote:
>>> One improvment could be to use google's NVMe vendor extension that
>>> I send in another thread, aslo here:
>>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=nvme-google-ext
>>>
>>> Qemu side:
>>> http://www.minggr.net/cgit/cgit.cgi/qemu/log/?h=vhost-nvme.0
>>> Kernel side also here:
>>> https://git.kernel.org/cgit/linux/kernel/git/mlin/linux.git/log/?h=vhost-nvme.0
>>
>> How much do you get with vhost-nvme plus vendor extension, compared to
>> 190 MB/s for QEMU?
> 
> There is still some bug. I'll update.

Sure.

>> Note that in all likelihood, QEMU can actually do better than 190 MB/s,
>> and gain more parallelism too, by moving the processing of the
>> ioeventfds to a separate thread.  This is similar to
>> hw/block/dataplane/virtio-blk.c.
>>
>> It's actually pretty easy to do.  Even though
>> hw/block/dataplane/virtio-blk.c is still using some old APIs, all memory
>> access in QEMU is now thread-safe.  I have pending patches for 2.6 that
>> cut that file down to a mere 200 lines of code, NVMe would probably be
>> about the same.
> 
> Is there a git tree for your patches?

No, not yet.  I'll post them today or tomorrow, will make sure to Cc you.

> Did you mean some pseduo code as below?
> 1. need a iothread for each cq/sq?
> 2. need a AioContext for each cq/sq?
> 
>  hw/block/nvme.c | 32 ++--
>  hw/block/nvme.h |  8 
>  2 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f27fd35..fed4827 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -28,6 +28,8 @@
>  #include "sysemu/sysemu.h"
>  #include "qapi/visitor.h"
>  #include "sysemu/block-backend.h"
> +#include "sysemu/iothread.h"
> +#include "qom/object_interfaces.h"
>  
>  #include "nvme.h"
>  
> @@ -558,9 +560,22 @@ static void nvme_init_cq_eventfd(NvmeCQueue *cq)
>  uint16_t offset = (cq->cqid*2+1) * (4 << NVME_CAP_DSTRD(n->bar.cap));
>  
>  event_notifier_init(&cq->notifier, 0);
> -event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
>  memory_region_add_eventfd(&n->iomem,
>  0x1000 + offset, 4, false, 0, &cq->notifier);
> +
> +object_initialize(&cq->internal_iothread_obj,
> +  sizeof(cq->internal_iothread_obj),
> +  TYPE_IOTHREAD);
> +user_creatable_complete(OBJECT(&cq->internal_iothread_obj), 
> &error_abort);

For now, you have to use one iothread for all cq/sq of a single NVMe
device; multiqueue block layer is planned for 2.7 or 2.8.  Otherwise
yes, it's very close to just these changes.

If you use "-object iothread,id=NN" and a iothread property, you can
also use an N:M model with multiple disks attached to the same iothread.
 Defining the iothread property is like

object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
(Object **)&s->conf.iothread,
qdev_prop_allow_set_link_before_realize,
OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);

Thanks,

Paolo

> +cq->iothread = &cq->internal_iothread_obj;
> +cq->ctx = iothread_get_aio_context(cq->iothread);
> +//Question: Need a conf.blk for each cq/sq???
> +//blk_set_aio_context(cq->conf->conf.blk, cq->ctx);
> +aio_context_acquire(cq->ctx);
> +aio_set_event_notifier(cq->ctx, &cq->notifier, true,
> +   nvme_cq_notifier);
> +aio_context_release(cq->ctx);
>  }
>  
>  static void nvme_sq_notifier(EventNotifier *e)
> @@ -578,9 +593,22 @@ static void nvme_init_sq_eventfd(NvmeSQueue *sq)
>  uint16_t offset = sq->sqid * 2 * (4 << NVME_CAP_DSTRD(n->bar.cap));
>  
>  event_notifier_init(&sq->notifier, 0);
> -event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
>  memory_region_add_eventfd(&n->iomem,
>  0x1000 + offset, 4, false, 0, &sq->notifier);
> +
> +object_initialize(&sq->internal_iothread_obj,
> +  sizeof(sq->internal_iothread_obj),
> +  TYPE_IOTHREAD);
> +user_creatable_complete(OBJECT(&sq->internal_iothread_obj), 
> &error_abort);
> +sq->iothread = &sq->internal_iothread_obj;
> +sq->ctx = iothread_get_aio_context(sq->iothread);
> +//Question: Need a conf.blk for each cq/sq???
> +//blk_set_aio_context(sq->conf->conf.blk, sq->ctx);
> +
> +aio_context_acquire(sq->ctx);
> +aio_set_event_notifier(sq->ctx, &sq->notifier, true,
> +   nvme_sq_notifier);
> +aio_context_release(sq->ctx);
>  }
>  
>  static uint16_t nvme_set_db_memory(NvmeCtrl *n, const NvmeCmd *cmd)
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 608f202..171ee0b 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -667,6 +667,10 @@ typedef struct NvmeSQueue {
>   * do not go over this value will not result in

Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Marc-André Lureau
Hi

- Original Message -
> >
> > You can't migrate the peers.
> 
> Then explain the case N'=0 to me: how can you migrate the master so that
> it's connected to a server afterwards?

Dest qemu: -incoming.. -chardev socket,path=dest-server 

That is, start your destination qemu with a destination server. The master
origin qemu will migrate the shared memory, and the dest memory will be sync
when the migration is done.

> > It works fine in the tests. Feel free to point out races or other issues.
> 
> I think I did: doorbell detection is inherently racy.
> 
> If you think it isn't, please refute my reasoning.

I gave you some clues on how I did it in ivshmem-test.c: waiting for a 
signature on the memory to be mapped (and also checking that peers received ids)

> If it's not broken, please explain to me how the guest should find out
> whether its ivshmem device sports a doorbell.

If you have received ID, you should be good to use the doorbell.



Re: [Qemu-devel] [PATCH v5 0/6] fw_cfg: spec update, misc. cleanup, optimize read

2015-11-23 Thread Gabriel L. Somlo
Ping ?

I can send out v6 and fix the commit blurb typo in patch 6/6 pointed
out by Laszlo (unless the series is already winding its way toward
eventually being applied).

Please advise.

Thanks,
--Gabriel

On Thu, Nov 05, 2015 at 09:32:46AM -0500, Gabriel L. Somlo wrote:
> New since v4:
> 
>   - Patch 5: Protect against (undefined) left-shifting a uint64_t
> by 64 bits
> 
>   - Patches 4 & 5: Reworked comment and commit blurb wording
> 
> Laszlo, thanks again for the thorough review (and the "C master class") :)
> 
> Regards,
>   --Gabriel
> 
> >New since v3:
> >
> >- Patches 1..3 unchanged
> >
> >- Inserted new patch at position #4: avoid calculating an entry
> >  pointer for the current item when s->cur_entry is FW_CFG_INVALID
> >
> >- Patch 5 (formerly #4) now has the generic read method correctly
> >  shift-left (adding 0 padding on the right) in the event that the
> >  current item payload is exhausted in the course of a multi-byte
> >  read operation (Laszlo, thanks again for catching that!)
> >
> >- Patch 6 (formerly #5) is a rebased version of its former self,
> >  but with no actual significant modifications.
> >
> >>New since v2:
> >>
> >>- Patches 1-3: updated to address Laszlo's suggestions for better
> >>  and more accurate change descriptions in commit logs, comments,
> >>  etc.
> >>
> >>- Patch 4: Per Laszlo's recommendation, this has been split into
> >>  two separate components for improved legibility:
> >>
> >>- Patch 4/5: Introduces the new generic read method, and
> >>  replaces the existing MMIO logic
> >>
> >>- Patch 5/5: Replaces the IOPort read logic, and cleans
> >>  up the remaining unused bits of code.
> >>
> >>>This series' main purpose is to update (and simplify) the specified
> >>>read callback behavior. An earlier standalone patch to move qemu function
> >>>call API documentation into fw_cfg.h should logically be part of the 
> >>>series.
> >>>
> >>>Here's the summary of what each patch does:
> >>>
> >>>- Patch 1/4 is an updated version of the standalone v1 patch
> >>>  I sent out earlier; it moves all the qemu-internal host-side
> >>>  function call api documentation out of docs/specs/fw_cfg.txt,
> >>>  and into the fw_cfg.h header file, next to the prototype of
> >>>  each documented api function.
> >>>
> >>>- Patch 2/4 modifies the specified behavior of read callbacks
> >>>  (from being invoked once per byte read, to being invoked once,
> >>>   before ANY data is read, specifically once each time an item
> >>>   is selected).
> >>>
> >>>- Patch 3/4 additionally removes the now-redundant offset argument
> >>>  from the read callback prototype.
> >>>
> >>>- Finally, 4/4 consolidates (non-DMA) reads, minimizing the number
> >>>  of times redundant sanity checks are performed, particularly for
> >>>  wide (> byte) sized reads.
> 
> Gabriel L. Somlo (6):
>   fw_cfg: move internal function call docs to header file
>   fw_cfg: amend callback behavior spec to once per select
>   fw_cfg: remove offset argument from callback prototype
>   fw_cfg: avoid calculating invalid current entry pointer
>   fw_cfg: add generic non-DMA read method
>   fw_cfg: replace ioport data read with generic method
> 
>  docs/specs/fw_cfg.txt |  85 +-
>  hw/arm/virt-acpi-build.c  |   2 +-
>  hw/i386/acpi-build.c  |   2 +-
>  hw/nvram/fw_cfg.c |  75 +--
>  include/hw/nvram/fw_cfg.h | 128 
> +-
>  trace-events  |   2 +-
>  6 files changed, 166 insertions(+), 128 deletions(-)
> 
> -- 
> 2.4.3
> 



[Qemu-devel] [PATCH] [doc] Introduce coding style for errors

2015-11-23 Thread Lluís Vilanova
Gives some general guidelines for reporting errors in QEMU.


Signed-off-by: Lluís Vilanova 
---
 HACKING  |   52 ++
 include/qapi/error.h |   12 
 util/error.c |   24 +--
 3 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/HACKING b/HACKING
index 12fbc8a..af36009 100644
--- a/HACKING
+++ b/HACKING
@@ -157,3 +157,55 @@ painful. These are:
  * you may assume that integers are 2s complement representation
  * you may assume that right shift of a signed integer duplicates
the sign bit (ie it is an arithmetic shift, not a logical shift)
+
+7. Error reporting
+
+QEMU provides two different mechanisms for reporting errors. You should use one
+of these mechanisms instead of manually reporting them (i.e., do not use
+'printf', 'exit' or 'abort').
+
+7.1. Errors in user inputs
+
+QEMU provides the functions in "include/qemu/error-report.h" to report errors
+related to inputs provided by the user (e.g., command line arguments or
+configuration files).
+
+These functions generate error messages with a uniform format that can 
reference
+a location on the offending input.
+
+7.2. Other errors
+
+QEMU provides the functions in "include/qapi/error.h" to report other types of
+errors (i.e., not triggered by command line arguments or configuration files).
+
+Functions in this header are used to accumulate error messages in an 'Error'
+object, which can be propagated up the call chain where it is finally reported.
+
+In its simplest form, you can immediately report an error with:
+
+error_setg(&error_now, "Error with %s", "arguments");
+
+For convenience, you can also use 'error_setg_errno' and 'error_setg_win32' to
+append a message for OS-specific errors, and 'error_setg_file_open' for errors
+when opening files.
+
+7.3. Errors with an immediate exit/abort
+
+There are two convenience functions to report errors that immediately terminate
+QEMU:
+
+* 'error_setg(&error_fatal, msg, ...)'
+
+  Reports a fatal error with the given error message and exits QEMU.
+
+* 'error_setg(&error_abort, msg, ...)'
+
+  Reports a programming error with the given error message and aborts QEMU.
+
+In general, you should report errors but *not* terminate QEMU if the errors are
+(or can be) triggered by guest code (e.g., some unimplemented corner case).
+This also applies to hardware emulation (it is OK to leave the device in a
+non-operational state until next reboot, though). Otherwise that can be abused
+by guest code to terminate QEMU.
+
+In such cases you should use the argument 'error_now'.
diff --git a/include/qapi/error.h b/include/qapi/error.h
index c69dddb..e2bfc19 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -53,6 +53,9 @@
  * Call a function treating errors as fatal:
  * foo(arg, &error_fatal);
  *
+ * Call a function immediately showing all errors:
+ * foo(arg, &error_now);
+ *
  * Receive an error and pass it on to the caller:
  * Error *err = NULL;
  * foo(arg, &err);
@@ -104,6 +107,7 @@ ErrorClass error_get_class(const Error *err);
  * then.
  * If @errp is &error_abort, print a suitable message and abort().
  * If @errp is &error_fatal, print a suitable message and exit(1).
+ * If @errp is &error_now, print a suitable message.
  * If @errp is anything else, *@errp must be NULL.
  * The new error's class is ERROR_CLASS_GENERIC_ERROR, and its
  * human-readable error message is made from printf-style @fmt, ...
@@ -154,6 +158,7 @@ void error_setg_win32_internal(Error **errp,
  * abort().
  * Else, if @dst_errp is &error_fatal, print a suitable message and
  * exit(1).
+ * Else, if @dst_errp is &error_now, print a suitable message.
  * Else, if @dst_errp already contains an error, ignore this one: free
  * the error object.
  * Else, move the error object from @local_err to *@dst_errp.
@@ -217,4 +222,11 @@ extern Error *error_abort;
  */
 extern Error *error_fatal;
 
+/*
+ * Pass to error_setg() & friends to immediately show an error.
+ *
+ * Has the same formatting of #error_abort, but does not terminate QEMU.
+ */
+extern Error *error_now;
+
 #endif
diff --git a/util/error.c b/util/error.c
index 8b86490..e507039 100644
--- a/util/error.c
+++ b/util/error.c
@@ -27,6 +27,7 @@ struct Error
 
 Error *error_abort;
 Error *error_fatal;
+Error *error_now;
 
 static void error_handle_fatal(Error **errp, Error *err)
 {
@@ -62,9 +63,14 @@ static void error_setv(Error **errp,
 err->func = func;
 
 error_handle_fatal(errp, err);
-*errp = err;
-
-errno = saved_errno;
+if (errp == &error_now) {
+fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+err->func, err->src, err->line);
+error_report_err(err);
+} else {
+*errp = err;
+errno = saved_errno;
+}
 }
 
 void error_set_internal(Error **errp,
@@ -226,9 +232,15 @@ void error_propagate(Error **dst_errp, Error *local_err)
 return;
 }
 error_hand

Re: [Qemu-devel] Is ivshmem's test for unix domain client socket valid?

2015-11-23 Thread Paolo Bonzini


On 20/11/2015 18:56, Markus Armbruster wrote:
> Looks rather fishy:
> 
> if (strncmp(s->server_chr->filename, "unix:", 5)) {
> error_setg(errp, "chardev is not a unix client socket");
> return;
> }
> 
> Paolo, is this reliable?

Yes, though by total chance (which is already a curious definition of
reliability).  If you create "-chardev file,path=unix:foo",
chr->filename ends up with "file" because only pty and socket chardevs
set chr->filename.  Everything else does not set it, and the field is
thus set to the default---the backend name.

> Is it the proper way to check?

It's the only one, which makes it the most proper too.  Sarcasm is
intentional.

Paolo



Re: [Qemu-devel] [PATCH v2 4/4] json-streamer: Limit number of tokens in addition to total size

2015-11-23 Thread Paolo Bonzini


On 20/11/2015 18:32, Eric Blake wrote:
> > static void qlist_size_iter(QObject *obj, void *opaque)
> > {
> > size_t *count = opaque;
> > (*count)++;
> > }
> 
> Yuck - we don't track size independently?  Seems like it might make a
> worthwhile addition,

Would you change your mind, if I told you that qlist_size is unused? :)

Paolo



Re: [Qemu-devel] [RFC] virtio: convert to use DMA api

2015-11-23 Thread Cornelia Huck
On Mon, 23 Nov 2015 11:52:50 +0200
"Michael S. Tsirkin"  wrote:

> On Mon, Nov 23, 2015 at 10:36:45AM +0100, Cornelia Huck wrote:
> > On Mon, 23 Nov 2015 15:41:11 +0800
> > Jason Wang  wrote:
> > 
> > > Currently, all virtio devices bypass IOMMU completely. This is because
> > > address_space_memory is assumed and used during DMA emulation. This
> > > patch converts the virtio core API to use DMA API. This idea is
> > > 
> > > - introducing a new transport specific helper to query the dma address
> > >   space. (only pci version is implemented).
> > > - query and use this address space during virtio device guest memory
> > >   accessing
> > > 
> > > With this virtiodevices will not bypass IOMMU anymore. Little tested with
> > > intel_iommu=on with virtio guest DMA series posted in
> > > https://lkml.org/lkml/2015/10/28/64.
> > > 
> > > TODO:
> > > - Feature bit for this
> > 
> > I'm still not convinced about that feature bit stuff. It just feels
> > wrong to use a mechanism that conveys negotiable device features to
> > configure what is basically a platform/hypervisor feature. I'd rather
> > see this out of the virtio layer and into the pci layer.
> 
> I agree it's not something that needs to be negotiated.
> 
> But given that we are changing device and driver behaviour in a drastic
> way, it seems prudent to have a way for guest and host to discover that,
> even if it's just in case.
> 
> Whether it's a feature bit or something else pci-specific,
> depends on whether this makes sense for any other transport.
> 
> > > - Implement this for all transports
> > 
> > Is it OK to just keep a fallback to today's implementation for
> > transports for which the iommu concept doesn't make sense and that will
> > always have an identity mapping?
> 
> Is there a reason why iommu does not make any sense for ccw or mmio?

Channel I/O uses a different concept when performing data transfers:
The addresses referenced by the channel program can be anywhere in the
guest's main memory (today's qemu only supports adresses under 2G, as
IDAWs are not yet implemented). Basically, the OS will refer to
addresses anywhere in its address space and the channel subsystem is
subsequently free to read from/write to that memory. This also applies
to queues: The OS will tell the channel subsystem/device which memory
areas can be accessed (the proprietary qdio transport is the same as
virtio in that regard). The first time we needed an iommu on s390 was
when pci was introduced.

If I understand correctly what an iommu does, that concept does not
match. For dma, we can fall back to an identity mapping, but I don't
see how we can fit in an iommu.

The mmio transport is a completely different beast; I'm afraid I don't
know enough about it to say how it interacts with iommus.




Re: [Qemu-devel] [RFC] virtio: convert to use DMA api

2015-11-23 Thread Peter Maydell
On 23 November 2015 at 14:34, Cornelia Huck  wrote:
> The mmio transport is a completely different beast; I'm afraid I don't
> know enough about it to say how it interacts with iommus.

Conceptually, it's just a device that does DMA, I think.
You could in theory put it behind an IOMMU, but nobody does
(and I don't know whether the kernel code could cope with
that).

thanks
-- PMM



Re: [Qemu-devel] [PATCH] virtio-blk: Move resetting of req->mr_next to virtio_blk_handle_rw_error

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 01:41, Fam Zheng wrote:
> "werror=report" would free the req in virtio_blk_handle_rw_error, we
> mustn't write to it in that case.
> 
> Reported-by: Paolo Bonzini 
> Signed-off-by: Fam Zheng 
> ---
>  hw/block/virtio-blk.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 848f3fe..756ae5c 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -72,6 +72,9 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
> int error,
>  VirtIOBlock *s = req->dev;
>  
>  if (action == BLOCK_ERROR_ACTION_STOP) {
> +/* Break the link as the next request is going to be parsed from the
> + * ring again. Otherwise we may end up doing a double completion! */
> +req->mr_next = NULL;
>  req->next = s->rq;
>  s->rq = req;
>  } else if (action == BLOCK_ERROR_ACTION_REPORT) {
> @@ -112,10 +115,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
>   * happen on the other side of the migration).
>   */
>  if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
> -/* Break the link in case the next request is added to the
> - * restart queue and is going to be parsed from the ring 
> again.
> - */
> -req->mr_next = NULL;
>  continue;
>  }
>  }
> 

Reviewed-by: Paolo Bonzini 



Re: [Qemu-devel] [PATCH 1/2] tests/Makefile: Add more dependencies for test-timed-average

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 13:39, Kevin Wolf wrote:
>  tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
>   libqemuutil.a stubs/clock-warp.o stubs/cpu-get-icount.o \
> - stubs/notify-event.o stubs/replay.o
> + stubs/notify-event.o stubs/replay.o stubs/mon-is-qmp.o 
> stubs/fd-register.o \
> + stubs/mon-printf.o
>  

Why not just add libqemustub.a?  (If it works, do not even bother
reposting).

Paolo



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Markus Armbruster
Marc-André Lureau  writes:

> Hi
>
> - Original Message -
>> >
>> > You can't migrate the peers.
>> 
>> Then explain the case N'=0 to me: how can you migrate the master so that
>> it's connected to a server afterwards?
>
> Dest qemu: -incoming.. -chardev socket,path=dest-server 
>
> That is, start your destination qemu with a destination server. The master
> origin qemu will migrate the shared memory, and the dest memory will be sync
> when the migration is done.

Got it!

>> > It works fine in the tests. Feel free to point out races or other issues.
>> 
>> I think I did: doorbell detection is inherently racy.
>> 
>> If you think it isn't, please refute my reasoning.
>
> I gave you some clues on how I did it in ivshmem-test.c: waiting for a
> signature on the memory to be mapped (and also checking that peers
> received ids)
>
>> If it's not broken, please explain to me how the guest should find out
>> whether its ivshmem device sports a doorbell.
>
> If you have received ID, you should be good to use the doorbell.

That's not a complete answer, so let me try a different tack.

What value will a read of register IVPosition yield

* if the device has no doorbell:

  I think the answer is -1.

* if the device has a doorbell, but it isn't ready, yet:

  I think the answer is -1.

* if the device has a doorbell, and it is ready.

  This is the part you answered already: >= 0.

Please correct mistakes.



Re: [Qemu-devel] [PATCH] target-i386: Do not set MCG_SER_P by default

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 14:22, Eduardo Habkost wrote:
> > Software Error Recovery, i.e. SER, is purely an Intel feature and it
> > shouldn't be set by default. Enable it only on Intel.
> 
> What happens when SER is enabled on an AMD CPU? If it really
> should't be enabled, why is KVM returning it on
> KVM_X86_GET_MCE_CAP_SUPPORTED?

Indeed... is it a problem if our frankenstein AMD CPU can recover from
memory errors?

Paolo



Re: [Qemu-devel] [PATCH for 2.6 1/3] backup: Use Bitmap to replace "s->bitmap"

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 10:55, Fam Zheng wrote:
>>> Why? I think bitmap_set is a better match with bitmap_new below.
>>
>> set_bit() is quicker than bitmap_set() if you only set one bit.
> 
> How much quicker is it? This doesn't sound convincing enough for me to lose 
> the
> readability.

Substantially.  It's also documented:

/*
 * Also the following operations apply to bitmaps.
 *
 * set_bit(bit, addr)   *addr |= bit
 * clear_bit(bit, addr) *addr &= ~bit
 * change_bit(bit, addr)*addr ^= bit
 * test_bit(bit, addr)  Is bit set in *addr?
 * test_and_set_bit(bit, addr)  Set bit and return old value
 * test_and_clear_bit(bit, addr)Clear bit and return old value
 * test_and_change_bit(bit, addr)   Change bit and return old value
 * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
 * find_first_bit(addr, nbits)  Position first set bit in *addr
 * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
 * find_next_bit(addr, nbits, bit)  Position next set bit in *addr >= bit
 */

Paolo



Re: [Qemu-devel] [PATCH WIP 01/30] crypto: add QCryptoSecret object class for password/key handling

2015-11-23 Thread Daniel P. Berrange
On Mon, Nov 23, 2015 at 02:39:27PM +0100, Markus Armbruster wrote:
> "Daniel P. Berrange"  writes:
> 
> > On Fri, Nov 20, 2015 at 03:09:25PM -0700, Eric Blake wrote:
> >> On 11/20/2015 11:04 AM, Daniel P. Berrange wrote:
> >> > +
> >> > +static const char *base64_valid_chars =
> >> > +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
> >> > +
> >> > +static int
> >> > +qcrypto_secret_validate_base64(const uint8_t *input,
> >> > +   size_t inputlen,
> >> > +   Error **errp)
> >> 
> >> Don't we already have base64 utility methods available?
> >
> > We normally use glib,  g_base64_encode/decode. Unfortunately the
> > decode method doesn't provide any usefull error reporting facility.
> > It just silently skips any characters that are outside the valid
> > set.  So the only way I could get any kind of sensible error report
> > was to do this validation myself against the set of permitted base64
> > characters.
> 
> Yes.  Same problem elsewhere, e.g. ringbuf-write.  qapi-schema.json:
> 
> #  - base64: data must be base64 encoded text.  Its binary
> #decoding gets written.
> #Bug: invalid base64 is currently not rejected.
> #Whitespace *is* invalid.
> 
> This suggests that we shouldn't bury this in crypto/, but instead add it
> to util/.
> 
> A replacement for g_base64_decode() could be easier to use than a
> checker function to use in addition to g_base64_decode(),

Yeah, that's a good idea. I'll look at that.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] [PATCH 2/2] test-aio: Fix event notifier cleanup

2015-11-23 Thread Paolo Bonzini


On 23/11/2015 13:39, Kevin Wolf wrote:
> One test case closed an event notifier (event_notifier_cleanup())
> without first disabling it (set_event_notifier(..., NULL)). This
> resulted in a leftover handle 0 that was added to each subsequent
> WaitForMultipleObjects() call, causing the function to fail (invalid
> handle).
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/test-aio.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tests/test-aio.c b/tests/test-aio.c
> index 1623803..e188d8c 100644
> --- a/tests/test-aio.c
> +++ b/tests/test-aio.c
> @@ -393,6 +393,7 @@ static void test_aio_external_client(void)
>  aio_enable_external(ctx);
>  }
>  assert(aio_poll(ctx, false));
> +set_event_notifier(ctx, &data.e, NULL);
>  event_notifier_cleanup(&data.e);
>  }
>  }
> 

Reviewed-by: Paolo Bonzini 



Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Marc-André Lureau


- Original Message -
> Marc-André Lureau  writes:
> 
> > Hi
> >
> > - Original Message -
> >> >
> >> > You can't migrate the peers.
> >> 
> >> Then explain the case N'=0 to me: how can you migrate the master so that
> >> it's connected to a server afterwards?
> >
> > Dest qemu: -incoming.. -chardev socket,path=dest-server
> >
> > That is, start your destination qemu with a destination server. The master
> > origin qemu will migrate the shared memory, and the dest memory will be
> > sync
> > when the migration is done.
> 
> Got it!
> 
> >> > It works fine in the tests. Feel free to point out races or other
> >> > issues.
> >> 
> >> I think I did: doorbell detection is inherently racy.
> >> 
> >> If you think it isn't, please refute my reasoning.
> >
> > I gave you some clues on how I did it in ivshmem-test.c: waiting for a
> > signature on the memory to be mapped (and also checking that peers
> > received ids)
> >
> >> If it's not broken, please explain to me how the guest should find out
> >> whether its ivshmem device sports a doorbell.
> >
> > If you have received ID, you should be good to use the doorbell.
> 
> That's not a complete answer, so let me try a different tack.
> 
> What value will a read of register IVPosition yield
> 
> * if the device has no doorbell:
> 
>   I think the answer is -1.
> 
> * if the device has a doorbell, but it isn't ready, yet:
> 
>   I think the answer is -1.
> 
> * if the device has a doorbell, and it is ready.
> 
>   This is the part you answered already: >= 0.
> 
> Please correct mistakes.
> 

Yes, I think it's correct. Arbitrary informations can be then shared via the 
shared memory (to say whether a doorbell will be present for ex).



Re: [Qemu-devel] [PATCH] target-i386: Do not set MCG_SER_P by default

2015-11-23 Thread Borislav Petkov
+ Tony.

On Mon, Nov 23, 2015 at 03:47:44PM +0100, Paolo Bonzini wrote:
> On 23/11/2015 14:22, Eduardo Habkost wrote:
> > > Software Error Recovery, i.e. SER, is purely an Intel feature and it
> > > shouldn't be set by default. Enable it only on Intel.
> > 
> > What happens when SER is enabled on an AMD CPU? If it really
> > should't be enabled, why is KVM returning it on
> > KVM_X86_GET_MCE_CAP_SUPPORTED?
> 
> Indeed... is it a problem if our frankenstein AMD CPU can recover from
> memory errors?

The problem stems from the fact that the guest kernel looks at SER and
does different handling depending on that bit:

machine_check_poll:

...

if (!(flags & MCP_UC) &&
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;

so when the guest kernel gets a correctable error (injected..., for
example) it sees that bit set. Even though kvm/qemu emulates an AMD
CPU. So on AMD with that bit set, it would puzzle the whole error
handling/reporting in the guest kernel.

Oh, btw, I'm using a kvm guest to inject MCEs. In case you were
wondering why is he even doing that. :-)

And I'm not sure it makes sense to set that bit for an Intel guest too.
For the simple reason that I don't know how much of the Software Error
Recovery stuff is actually implemented there. If stuff is missing, you
probably don't want to advertize it there too. And by "stuff" I mean
all that fun in section "15.6 RECOVERY OF UNCORRECTED RECOVERABLE (UCR)
ERRORS" of the SDM.

It's a whole another question how/whether to do UCR error handling in
the guest or maybe leave it to the host...

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.



[Qemu-devel] MCG_CAP ABI breakage (was Re: [PATCH] target-i386: Do not set MCG_SER_P by default)

2015-11-23 Thread Eduardo Habkost
On Mon, Nov 23, 2015 at 11:22:37AM -0200, Eduardo Habkost wrote:
[...]
> In the case of this code, it looks like it's already broken
> because the resulting mcg_cap depends on host kernel capabilities
> (the ones reported by kvm_get_mce_cap_supported()), and the data
> initialized by target-i386/cpu.c:mce_init() is silently
> overwritten by kvm_arch_init_vcpu(). So we would need to fix that
> before implementing a proper compatibility mechanism for
> mcg_cap.

Fortunately, when running Linux v2.6.37 and later,
kvm_arch_init_vcpu() won't actually change mcg_cap (see details
below).

But the code is broken if running on Linux between v2.6.32 and
v2.6.36: it will clear MCG_SER_P silently (and silently enable
MCG_SER_P when migrating to a newer host).

But I don't know what we should do on those cases. If we abort
initialization when the host doesn't support MCG_SER_P, all CPU
models with MCE and MCA enabled will become unrunnable on Linux
between v2.6.32 and v2.6.36. Should we do that, and simply ask
people to upgrade their kernels (or explicitly disable MCE) if
they want to run latest QEMU?

For reference, these are the capabilities returned by Linux:
* KVM_MAX_MCE_BANKS is 32 since
  890ca9aefa78f7831f8f633cab9e4803636dffe4 (v2.6.32-rc1~693^2~199)
* KVM_MCE_CAP_SUPPORTED is (MCG_CTL_P | MCG_SER_P) since
  5854dbca9b235f8cdd414a0961018763d2d5bf77 (v2.6.37-rc1~142^2~3)
* KVM_MCE_CAP_SUPPORTED is MCG_CTL_P between
  890ca9aefa78f7831f8f633cab9e4803636dffe4 (v2.6.32-rc1~693^2~199)
  and 5854dbca9b235f8cdd414a0961018763d2d5bf77 (v2.6.37-rc1~142^2~3)

The current definitions in QEMU are:
#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
#define MCE_BANKS_DEF   10

The target-i386/cpu.c:mce_init() code sets mcg_cap to:
env->mcg_cap == MCE_CAP_DEF | MCE_BANKS_DEF;
 == (MCG_CTL_P|MCG_SER_P) | 10;

The kvm_arch_init_vcpu() code that changes mcg_cap does the following:
kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
if (banks > MCE_BANKS_DEF) {
banks = MCE_BANKS_DEF;
}
mcg_cap &= MCE_CAP_DEF;
mcg_cap |= banks;
env->mcg_cap = mcg_cap;
  * Therefore, if running Linux v2.6.37 or newer, this will be
the result:
banks == 10;
mcg_cap == (MCG_CTL_P|MCG_SER_P) | banks
== (MCG_CTL_P|MCG_SER_P) | 10;
* That's the same value set by mce_init(), so fortunately
  kvm_arch_init_vcpu() isn't actually changing mcg_cap
  if running Linux v.2.6.37 and newer.
  * However, if running Linux between v2.6.32 and v2.6.37,
kvm_arch_init_vcpu() will silently clear MCG_SER_P.

-- 
Eduardo



Re: [Qemu-devel] [virtio-dev] [RFC v1] virtio-crypto specification

2015-11-23 Thread Cornelia Huck
On Fri, 20 Nov 2015 03:27:51 +
"Gonglei (Arei)"  wrote:

> Hi guys,
> 
> After initial discussion at this year's KVM forum, I post the RFC version of 
> virtio-crypto
> device specification now. 
> 
> If you have any comments, please let me know, thanks.

Some further comments from my side, but this looks good generally.

> 
> Regards,
> -Gonglei
> 
> 
> 1 Crypto Device
> 
> The virtio crypto device is a virtual crypto device (ie. hardware crypto 
> accelerator card). Encrypt and decrypt requests are placed in the data queue, 
> and handled by the real hardware crypto accelerators finally. A second queue 
> is the controlling queue, which is used to create/destroy session or some 
> other advanced filtering features.
> 
> 1.1   Device ID
> 
>   65535 (experimental)
> 
> 1.2   Virtqueues
> 
> 0 
>   controlq
> 1
>   dataq
> 
> 1.3   Feature bits
> 
> VIRTIO_CRYPTO_F_REQ_SIZE_MAX (0)  
> Maximum size of any single request is in “size_max”. 

Any reason you'd want to make this optional? Would it make sense if the
config space field always contains either the maximum size or "0" for
"unlimited"?

> VIRTIO_CRYPTO_F_SYM (1)
> Device supports the symmetric cryptography API.
> VIRTIO_CRYPTO_F_DH (2)
> Device supports the Diffie Hellman API.
> VIRTIO_CRYPTO_F_DSA (3)
> Device supports the DSA API.
> VIRTIO_CRYPTO_F_RSA (4)
> Device supports the RSA API.
> VIRTIO_CRYPTO_F_EC (5)
> Device supports the Elliptic Curve API.
> VIRTIO_CRYPTO_F_ECDH (6)
> Device supports the Elliptic Curve Diffie Hellman API.
> VIRTIO_CRYPTO_F_ECDSA (7)
> Device supports the Elliptic Curve DSA API.
> VIRTIO_CRYPTO_F _KEY (8)
> Device supports the Key Generation API.
> VIRTIO_CRYPTO_F_LN (9)
> Device supports the Large Number API.
> VIRTIO_CRYPTO_F_PRIME (10)
> Device supports the prime number testing API.
> VIRTIO_CRYPTO_F_DRGB (11)
> Device supports the DRGB API.
> VIRTIO_CRYPTO_F_NRGB (12)
> Device supports the NRGB API.
> VIRTIO_CRYPTO_F_RAND (13)
> Device supports the random bit/number generation API.

I'm wondering whether you'll really want a feature bit for each
algorithm. Bits 0-23 are device specific, so there's still some more to
go; but could this also go into the config space instead? It's not like
the driver will want to negotiate which algorithms it wants to use, but
rather it will want to check what the device may offer.

Another thought on negotiable features: The device might provide both a
hardware-backed implementation of (some) APIs and an emulation of
(other) APIs. The driver might, however, want to make sure that a
hardware-backed implementation is used. Would it make sense to add an
negotiable feature bit for allowing software emulation?

> 
> 1.4   Device configuration layout
> 
> struct virtio_crypto_config {
>   le32 size_max; /* Maximum size of any single request */
> }
> 
> 1.5   Device Initialization
> 
> 1. The initialization routine should identify the data and control virtqueues.
> 2. If the VIRTIO_CRYPTO_F_SYM feature bit is negotiated, identify the device 
> supports the symmetric cryptography API, which as the same as other features.
> 
> 1.6   Device Operation
> 
> The controlq is used to control session operations, such as create or 
> destroy. Meanwhile, some other features or functions can also be handled by 
> controlq. The control request is preceded by a header:
> struct virtio_crypto_ctx_outhdr {
> /* cipher algorithm type (ie. aes-cbc ) */
> __virtio32 alg;
> /* length of key */
> __virtio32 keylen;
> /* reserved */
> __virtio32 flags;

Anything you have in mind for these?

> /* control type  */
> uint8_t type;
> /* encrypt or decrypt */
> uint8_t op;
> /* mode of hash operation, including authenticated/plain/nested hash */
> uint8_t hash_mode;
> /* authenticate hash/cipher ordering  */
> uint8_t alg_chain_order;
> /* length of authenticated key */
> __virtio32 auth_key_len;
> /* hash algorithm type */
> __virtio32 hash_alg;

May there be algorithm-specific parameters as well?

> };
> The encrypt/decrypt requests and the corresponding results are transmitted by 
> placing them in dataq. The request itself is preceded by a header:
> struct virtio_crypto_req_outhdr {
> /* algorithm type (ie. aes-128-cbc ) */
> __virtio32 mode;
> /* length of iv */
> __virtio32 ivlen;
> /* length of source data */
> __virtio32 len;
> /* length of auth data */
> __virtio32 auth_len;
> /* the backend session id */
> __virtio64 session_id;
> /* reserved */
> __virtio32 flags;
> };
> 
> Both ctx and data requests end by a status byte. The final status byte is 
> written by the device: either VIRTIO_CRYPTO_S_OK for success, 
> VIRTIO_BLK_S_IOERR for device or driver error or VIRTIO_BLK_S_UNSUPP for a 
> request unsupported by device, VIRTIO_CRYPTO_S_BADMSG for verification failed 
> when decrypt AEAD algorithms:

s/BLK/CRYPTO/ :)

> 
> #define VIRTIO_CRYPTO_S_OK0
> #define

Re: [Qemu-devel] ivshmem property size should be a size, not a string

2015-11-23 Thread Markus Armbruster
Marc-André Lureau  writes:

> - Original Message -
>> Marc-André Lureau  writes:
>> 
>> > Hi
>> >
>> > - Original Message -
>> >> >
>> >> > You can't migrate the peers.
>> >> 
>> >> Then explain the case N'=0 to me: how can you migrate the master so that
>> >> it's connected to a server afterwards?
>> >
>> > Dest qemu: -incoming.. -chardev socket,path=dest-server
>> >
>> > That is, start your destination qemu with a destination server. The master
>> > origin qemu will migrate the shared memory, and the dest memory will be
>> > sync
>> > when the migration is done.
>> 
>> Got it!
>> 
>> >> > It works fine in the tests. Feel free to point out races or other
>> >> > issues.
>> >> 
>> >> I think I did: doorbell detection is inherently racy.
>> >> 
>> >> If you think it isn't, please refute my reasoning.
>> >
>> > I gave you some clues on how I did it in ivshmem-test.c: waiting for a
>> > signature on the memory to be mapped (and also checking that peers
>> > received ids)
>> >
>> >> If it's not broken, please explain to me how the guest should find out
>> >> whether its ivshmem device sports a doorbell.
>> >
>> > If you have received ID, you should be good to use the doorbell.
>> 
>> That's not a complete answer, so let me try a different tack.
>> 
>> What value will a read of register IVPosition yield
>> 
>> * if the device has no doorbell:
>> 
>>   I think the answer is -1.
>> 
>> * if the device has a doorbell, but it isn't ready, yet:
>> 
>>   I think the answer is -1.
>> 
>> * if the device has a doorbell, and it is ready.
>> 
>>   This is the part you answered already: >= 0.
>> 
>> Please correct mistakes.
>> 
>
> Yes, I think it's correct. Arbitrary informations can be then shared
> via the shared memory (to say whether a doorbell will be present for
> ex).

Then there is no race-free way for the guest to distinguish between "no
doorbell" and "doorbell not ready" without some higher-level protocol.

Corollary: when IVPosition reads -1, BAR 2 may or may not be mapped, and
the only way to find out is accessing it.

Feels plenty broken to me.  Straight NAK in fact if it wasn't already in
the tree.



Re: [Qemu-devel] [PATCH REPOST 1/2] dump-guest-memory: add "detach" flag for QMP/HMP interfaces

2015-11-23 Thread Andrew Jones
On Mon, Nov 23, 2015 at 06:07:41PM +0800, Peter Xu wrote:
> This patch only add the interfaces, but not implementing them.
> 
> Signed-off-by: Peter Xu 
> ---
>  dump.c   | 3 ++-
>  hmp-commands.hx  | 5 +++--
>  hmp.c| 3 ++-
>  qapi-schema.json | 3 ++-
>  qmp-commands.hx  | 4 ++--
>  5 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/dump.c b/dump.c
> index 78b7d84..3ec3423 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -1598,7 +1598,8 @@ cleanup:
>  void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
> int64_t begin, bool has_length,
> int64_t length, bool has_format,
> -   DumpGuestMemoryFormat format, Error **errp)
> +   DumpGuestMemoryFormat format, bool detach,
> +   Error **errp)
>  {
>  const char *p;
>  int fd = -1;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index bb52e4d..8e27671 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1056,10 +1056,11 @@ ETEXI
>  
>  {
>  .name   = "dump-guest-memory",
> -.args_type  = 
> "paging:-p,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
> -.params = "[-p] [-z|-l|-s] filename [begin length]",
> +.args_type  = 
> "paging:-p,detach:-d,zlib:-z,lzo:-l,snappy:-s,filename:F,begin:i?,length:i?",
> +.params = "[-p] [-d] [-z|-l|-s] filename [begin length]",
>  .help   = "dump guest memory into file 'filename'.\n\t\t\t"
>"-p: do paging to get guest's memory mapping.\n\t\t\t"
> +  "-d: return immediately (not wait for 
> completion).\n\t\t\t"

(do not wait...)

>"-z: dump in kdump-compressed format, with zlib 
> compression.\n\t\t\t"
>"-l: dump in kdump-compressed format, with lzo 
> compression.\n\t\t\t"
>"-s: dump in kdump-compressed format, with snappy 
> compression.\n\t\t\t"
> diff --git a/hmp.c b/hmp.c
> index 2140605..4c5480d 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1580,6 +1580,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
> *qdict)
>  {
>  Error *err = NULL;
>  bool paging = qdict_get_try_bool(qdict, "paging", false);
> +bool detach = qdict_get_try_bool(qdict, "detach", false);
>  bool zlib = qdict_get_try_bool(qdict, "zlib", false);
>  bool lzo = qdict_get_try_bool(qdict, "lzo", false);
>  bool snappy = qdict_get_try_bool(qdict, "snappy", false);
> @@ -1619,7 +1620,7 @@ void hmp_dump_guest_memory(Monitor *mon, const QDict 
> *qdict)
>  prot = g_strconcat("file:", file, NULL);
>  
>  qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
> -  true, dump_format, &err);
> +  true, dump_format, detach, &err);
>  hmp_handle_error(mon, &err);
>  g_free(prot);
>  }
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 8b1a423..1211082 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2132,7 +2132,8 @@
>  ##
>  { 'command': 'dump-guest-memory',
>'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
> -'*length': 'int', '*format': 'DumpGuestMemoryFormat' } }
> +'*length': 'int', '*format': 'DumpGuestMemoryFormat',
> +'detach': 'bool'} }

For consistency I would put detach after paging.

>  
>  ##
>  # @DumpGuestMemoryCapability:
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 9d8b42f..86864f6 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -840,8 +840,8 @@ EQMP
>  
>  {
>  .name   = "dump-guest-memory",
> -.args_type  = "paging:b,protocol:s,begin:i?,end:i?,format:s?",
> -.params = "-p protocol [begin] [length] [format]",
> +.args_type  = 
> "paging:b,detach:d,protocol:s,begin:i?,end:i?,format:s?",
> +.params = "-p protocol [-d] [begin] [length] [format]",

shouldn't this be -p -d protocol [begin] [length] [format]

>  .help   = "dump guest memory to file",
>  .mhandler.cmd_new = qmp_marshal_dump_guest_memory,
>  },
> -- 
> 2.4.3
> 
> 



Re: [Qemu-devel] [PATCH REPOST 2/2] dump-guest-memory: add basic "detach" support.

2015-11-23 Thread Daniel P. Berrange
On Mon, Nov 23, 2015 at 06:07:42PM +0800, Peter Xu wrote:
> This will allow the user specify "-d" (just like command
> "migrate") when using "dump-guest-memory" command. When
> specified, one background thread is created to do the dump work.
> One flag is added to show whether there is a background dump
> work in progress.

Your comparison to the 'migrate' command is not entirely
accurate. While the 'detach' flag exist in the QMP parameters
schema, it is invalid to use it - migration is always backgrounded
in QMP.

[quote src="qmp-commands.hx"]
  (3) The user Monitor's "detach" argument is invalid in QMP and should not
be used
[/quote]

A further difference is that with migrate, you can use
'info migrate' to determine if the operation is complete.
AFAIK, with this proposal there's no way to see if the
dump is complete - you just have to keep runing 'cont'
until it doesn't return an error.

I'm curious about the intended usage of this change
and whether this design satisfies the requirements
it may have

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



[Qemu-devel] [PATCH v5 2/4] kobject: export kset_find_obj() for module use

2015-11-23 Thread Gabriel L. Somlo
From: Gabriel Somlo 

Signed-off-by: Gabriel Somlo 
---
 lib/kobject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/kobject.c b/lib/kobject.c
index 7cbccd2..90d1be6 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -861,6 +861,7 @@ struct kobject *kset_find_obj(struct kset *kset, const char 
*name)
spin_unlock(&kset->list_lock);
return ret;
 }
+EXPORT_SYMBOL(kset_find_obj);
 
 static void kset_release(struct kobject *kobj)
 {
-- 
2.4.3




[Qemu-devel] [PATCH v5 4/4] devicetree: update documentation for fw_cfg ARM bindings

2015-11-23 Thread Gabriel L. Somlo
From: Gabriel Somlo 

Remove fw_cfg hardware interface details from
Documentation/devicetree/bindings/arm/fw-cfg.txt,
and replace them with a pointer to the authoritative
documentation in the QEMU source tree.

Signed-off-by: Gabriel Somlo 
Cc: Laszlo Ersek 
---
 Documentation/devicetree/bindings/arm/fw-cfg.txt | 38 ++--
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/fw-cfg.txt 
b/Documentation/devicetree/bindings/arm/fw-cfg.txt
index 953fb64..ce27386 100644
--- a/Documentation/devicetree/bindings/arm/fw-cfg.txt
+++ b/Documentation/devicetree/bindings/arm/fw-cfg.txt
@@ -11,43 +11,9 @@ QEMU exposes the control and data register to ARM guests as 
memory mapped
 registers; their location is communicated to the guest's UEFI firmware in the
 DTB that QEMU places at the bottom of the guest's DRAM.
 
-The guest writes a selector value (a key) to the selector register, and then
-can read the corresponding data (produced by QEMU) via the data register. If
-the selected entry is writable, the guest can rewrite it through the data
-register.
+The authoritative guest-side hardware interface documentation to the fw_cfg
+device ca be found in "docs/specs/fw_cfg.txt" in the QEMU source tree.
 
-The selector register takes keys in big endian byte order.
-
-The data register allows accesses with 8, 16, 32 and 64-bit width (only at
-offset 0 of the register). Accesses larger than a byte are interpreted as
-arrays, bundled together only for better performance. The bytes constituting
-such a word, in increasing address order, correspond to the bytes that would
-have been transferred by byte-wide accesses in chronological order.
-
-The interface allows guest firmware to download various parameters and blobs
-that affect how the firmware works and what tables it installs for the guest
-OS. For example, boot order of devices, ACPI tables, SMBIOS tables, kernel and
-initrd images for direct kernel booting, virtual machine UUID, SMP information,
-virtual NUMA topology, and so on.
-
-The authoritative registry of the valid selector values and their meanings is
-the QEMU source code; the structure of the data blobs corresponding to the
-individual key values is also defined in the QEMU source code.
-
-The presence of the registers can be verified by selecting the "signature" blob
-with key 0x, and reading four bytes from the data register. The returned
-signature is "QEMU".
-
-The outermost protocol (involving the write / read sequences of the control and
-data registers) is expected to be versioned, and/or described by feature bits.
-The interface revision / feature bitmap can be retrieved with key 0x0001. The
-blob to be read from the data register has size 4, and it is to be interpreted
-as a uint32_t value in little endian byte order. The current value
-(corresponding to the above outer protocol) is zero.
-
-The guest kernel is not expected to use these registers (although it is
-certainly allowed to); the device tree bindings are documented here because
-this is where device tree bindings reside in general.
 
 Required properties:
 
-- 
2.4.3




[Qemu-devel] [PATCH v5 1/4] firmware: introduce sysfs driver for QEMU's fw_cfg device

2015-11-23 Thread Gabriel L. Somlo
From: Gabriel Somlo 

Make fw_cfg entries of type "file" available via sysfs. Entries
are listed under /sys/firmware/qemu_fw_cfg/by_key, in folders
named after each entry's selector key. Filename, selector value,
and size read-only attributes are included for each entry. Also,
a "raw" attribute allows retrieval of the full binary content of
each entry.

The fw_cfg device can be instantiated automatically from ACPI or
the Device Tree, or manually by using a kernel module (or command
line) parameter, with a syntax outlined in the documentation file.

Signed-off-by: Gabriel Somlo 
---
 .../ABI/testing/sysfs-firmware-qemu_fw_cfg |  58 ++
 drivers/firmware/Kconfig   |  19 +
 drivers/firmware/Makefile  |   1 +
 drivers/firmware/qemu_fw_cfg.c | 611 +
 4 files changed, 689 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
 create mode 100644 drivers/firmware/qemu_fw_cfg.c

diff --git a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg 
b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
new file mode 100644
index 000..718fbac
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
@@ -0,0 +1,58 @@
+What:  /sys/firmware/qemu_fw_cfg/
+Date:  August 2015
+Contact:   Gabriel Somlo 
+Description:
+   Several different architectures supported by QEMU (x86, arm,
+   sun4*, ppc/mac) are provisioned with a firmware configuration
+   (fw_cfg) device, originally intended as a way for the host to
+   provide configuration data to the guest firmware. Starting
+   with QEMU v2.4, arbitrary fw_cfg file entries may be specified
+   by the user on the command line, which makes fw_cfg additionally
+   useful as an out-of-band, asynchronous mechanism for providing
+   configuration data to the guest userspace.
+
+   The authoritative guest-side hardware interface documentation
+   to the fw_cfg device ca be found in "docs/specs/fw_cfg.txt" in
+   the QEMU source tree.
+
+   === SysFS fw_cfg Interface ===
+
+   The fw_cfg sysfs interface described in this document is only
+   intended to display discoverable blobs (i.e., those registered
+   with the file directory), as there is no way to determine the
+   presence or size of "legacy" blobs (with selector keys between
+   0x0002 and 0x0018) programmatically.
+
+   All fw_cfg information is shown under:
+
+   /sys/firmware/qemu_fw_cfg/
+
+   The only legacy blob displayed is the fw_cfg device revision:
+
+   /sys/firmware/qemu_fw_cfg/rev
+
+   --- Discoverable fw_cfg blobs by selector key ---
+
+   All discoverable blobs listed in the fw_cfg file directory are
+   displayed as entries named after their unique selector key
+   value, e.g.:
+
+   /sys/firmware/qemu_fw_cfg/by_key/32
+   /sys/firmware/qemu_fw_cfg/by_key/33
+   /sys/firmware/qemu_fw_cfg/by_key/34
+   ...
+
+   Each such fw_cfg sysfs entry has the following values exported
+   as attributes:
+
+   name: The 56-byte nul-terminated ASCII string used as the
+ blob's 'file name' in the fw_cfg directory.
+   size: The length of the blob, as given in the fw_cfg
+ directory.
+   key : The value of the blob's selector key as given in the
+ fw_cfg directory. This value is the same as used in
+ the parent directory name.
+   raw : The raw bytes of the blob, obtained by selecting the
+ entry via the control register, and reading a number
+ of bytes equal to the blob size from the data
+ register.
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index cf478fe..3cf920a 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -161,6 +161,25 @@ config RASPBERRYPI_FIRMWARE
  This option enables support for communicating with the firmware on the
  Raspberry Pi.
 
+config FW_CFG_SYSFS
+   tristate "QEMU fw_cfg device support in sysfs"
+   depends on SYSFS
+   default n
+   help
+ Say Y or M here to enable the exporting of the QEMU firmware
+ configuration (fw_cfg) file entries via sysfs. Entries are
+ found under /sys/firmware/fw_cfg when this option is enabled
+ and loaded.
+
+config FW_CFG_SYSFS_CMDLINE
+   bool "QEMU fw_cfg device parameter parsing"
+   depends on FW_CFG_SYSFS
+   help
+ Allow the qemu_fw_cfg 

[Qemu-devel] [PATCH v5 3/4] firmware: create directory hierarchy for sysfs fw_cfg entries

2015-11-23 Thread Gabriel L. Somlo
From: Gabriel Somlo 

Each fw_cfg entry of type "file" has an associated 56-char,
nul-terminated ASCII string which represents its name. While
the fw_cfg device doesn't itself impose any specific naming
convention, QEMU developers have traditionally used path name
semantics (i.e. "etc/acpi/rsdp") to descriptively name the
various fw_cfg "blobs" passed into the guest.

This patch attempts, on a best effort basis, to create a
directory hierarchy representing the content of fw_cfg file
names, under /sys/firmware/qemu_fw_cfg/by_name.

Upon successful creation of all directories representing the
"dirname" portion of a fw_cfg file, a symlink will be created
to represent the "basename", pointing at the appropriate
/sys/firmware/qemu_fw_cfg/by_key entry. If a file name is not
suitable for this procedure (e.g., if its basename or dirname
components collide with an already existing dirname component
or basename, respectively) the corresponding fw_cfg blob is
skipped and will remain available in sysfs only by its selector
key value.

Signed-off-by: Gabriel Somlo 
Cc: Andy Lutomirski 
---
 .../ABI/testing/sysfs-firmware-qemu_fw_cfg |  42 
 drivers/firmware/qemu_fw_cfg.c | 109 -
 2 files changed, 148 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg 
b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
index 718fbac..3823804 100644
--- a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
+++ b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg
@@ -56,3 +56,45 @@ Description:
  entry via the control register, and reading a number
  of bytes equal to the blob size from the data
  register.
+
+   --- Listing fw_cfg blobs by file name ---
+
+   While the fw_cfg device does not impose any specific naming
+   convention on the blobs registered in the file directory,
+   QEMU developers have traditionally used path name semantics
+   to give each blob a descriptive name. For example:
+
+   "bootorder"
+   "genroms/kvmvapic.bin"
+   "etc/e820"
+   "etc/boot-fail-wait"
+   "etc/system-states"
+   "etc/table-loader"
+   "etc/acpi/rsdp"
+   "etc/acpi/tables"
+   "etc/smbios/smbios-tables"
+   "etc/smbios/smbios-anchor"
+   ...
+
+   In addition to the listing by unique selector key described
+   above, the fw_cfg sysfs driver also attempts to build a tree
+   of directories matching the path name components of fw_cfg
+   blob names, ending in symlinks to the by_key entry for each
+   "basename", as illustrated below (assume current directory is
+   /sys/firmware):
+
+   qemu_fw_cfg/by_name/bootorder -> ../by_key/38
+   qemu_fw_cfg/by_name/etc/e820 -> ../../by_key/35
+   qemu_fw_cfg/by_name/etc/acpi/rsdp -> ../../../by_key/41
+   ...
+
+   Construction of the directory tree and symlinks is done on a
+   "best-effort" basis, as there is no guarantee that components
+   of fw_cfg blob names are always "well behaved". I.e., there is
+   the possibility that a symlink (basename) will conflict with
+   a dirname component of another fw_cfg blob, in which case the
+   creation of the offending /sys/firmware/qemu_fw_cfg/by_name
+   entry will be skipped.
+
+   The authoritative list of entries will continue to be found
+   under the /sys/firmware/qemu_fw_cfg/by_key directory.
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 618304a..9ac1ca7 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -318,9 +318,103 @@ static struct bin_attribute fw_cfg_sysfs_attr_raw = {
.read = fw_cfg_sysfs_read_raw,
 };
 
-/* kobjects representing top-level and by_key folders */
+/*
+ * Create a kset subdirectory matching each '/' delimited dirname token
+ * in 'name', starting with sysfs kset/folder 'dir'; At the end, create
+ * a symlink directed at the given 'target'.
+ * NOTE: We do this on a best-effort basis, since 'name' is not guaranteed
+ * to be a well-behaved path name. Whenever a symlink vs. kset directory
+ * name collision occurs, the kernel will issue big scary warnings while
+ * refusing to add the offending link or directory. We follow up with our
+ * own, slightly less scary error messages explaining the situation :)
+ */
+static int fw_cfg_build_symlink(struct kset *dir,
+   struct kobject *target, const char *name)
+{
+   int ret;
+   struct

[Qemu-devel] [PATCH v2 02/21] block: Fix reopen with semantically overlapping options

2015-11-23 Thread Kevin Wolf
This fixes bdrv_reopen() calls like the following one:

qemu-io -c 'open -o overlap-check.template=all /tmp/test.qcow2' \
-c 'reopen -o overlap-check=none'

The approach taken so far would result in an options QDict that has both
"overlap-check.template=all" and "overlap-check=none", which obviously
conflicts. In this case, the old option should be overridden by the
newly specified option.

Signed-off-by: Kevin Wolf 
---
 block.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 3a7324b..675e5a8 100644
--- a/block.c
+++ b/block.c
@@ -624,6 +624,20 @@ static int refresh_total_sectors(BlockDriverState *bs, 
int64_t hint)
 }
 
 /**
+ * Combines a QDict of new block driver @options with any missing options taken
+ * from @old_options, so that leaving out an option defaults to its old value.
+ */
+static void bdrv_join_options(BlockDriverState *bs, QDict *options,
+  QDict *old_options)
+{
+if (bs->drv && bs->drv->bdrv_join_options) {
+bs->drv->bdrv_join_options(options, old_options);
+} else {
+qdict_join(options, old_options, false);
+}
+}
+
+/**
  * Set open flags for a given discard mode
  *
  * Return 0 on success, -1 if the discard mode was invalid.
@@ -1663,7 +1677,7 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue 
*bs_queue,
 }
 
 old_options = qdict_clone_shallow(bs->options);
-qdict_join(options, old_options, false);
+bdrv_join_options(bs, options, old_options);
 QDECREF(old_options);
 
 /* bdrv_open() masks this flag out */
-- 
1.8.3.1




[Qemu-devel] [PATCH v5 0/4] SysFS driver for QEMU fw_cfg device

2015-11-23 Thread Gabriel L. Somlo
Allow access to QEMU firmware blobs, passed into the guest VM via
the fw_cfg device, through SysFS entries. Blob meta-data (e.g. name,
size, and fw_cfg key), as well as the raw binary blob data may be
accessed.

The SysFS access location is /sys/firmware/qemu_fw_cfg/... and was
selected based on overall similarity to the type of information
exposed under /sys/firmware/dmi/entries/...

New since v4:

Documentation (Patches 1/4 and 4/4) now points to the authoritative
file in the QEMU source tree for any details related to the "hardware
interface" of the fw_cfg device; Only details specific to sysfs (1/4) 
and DT (4/4) should stay in the kernel docs.

Thanks,
  --Gabriel

>New (since v3):
>
>   Patch 1/4: Device probing now works with either ACPI, DT, or
>  optionally by manually specifying a base, size, and
>  register offsets on the command line. This way, all
>  architectures offering fw_cfg can be supported, although
>  x86 and ARM get *automatic* support via ACPI and/or DT.
>
>  HUGE thanks to Laszlo Ersek  for
>  pointing out drivers/virtio/virtio_mmio.c, as an example
>  on how to pull this off !!!
>
>  Stefan: I saw Marc's DMA patches to fw_cfg. Since only
>  x86 and ARM will support it starting with QEMU 2.5, and
>  since I expect to get lots of otherwise interesting (but
>  otherwise orthogonal) feedback on this series, I'd like
>  to stick with ioread8() across the board for now. We can
>  always patch in DMA support in a backward compatible way
>  later, once this series gets (hopefully) accepted :)
>
>   Patch 2/4: (was 3/4 in v3): unchanged. Exports kset_find_obj() so
>  modules can call it.
>
>   Patch 3/4: (was 4/4 in v3): rebased, but otherwise the same.
>  Essentially, creates a "human readable" directory
>  hierarchy from "path-like" tokens making up fw_cfg
>  blob names. I'm not really sure there's a way to make
>  this happen via udev rules, but I have at least one
>  potential use case for doing it *before* udev becomes
>  available (cc: Andy Lutomirski ),
>  so I'd be happy to leave this functionality in the
>  kernel module. See further below for an illustration
>  of this.
>
>   Patch 4/4: Updates the existing ARM DT documentation for fw_cfg,
>  mainly by pointing at the more comprehensive document
>  introduced with Patch 1/4 for details on the fw_cfg
>  device interface, leaving only the specific ARM/DT
>  address/size node information in place.
>
>>  In addition to the "by_key" blob listing, e.g.:
>>  
>>  $ tree /sys/firmware/qemu_fw_cfg/
>>  /sys/firmware/qemu_fw_cfg/
>>  |-- by_key
>>  |   |-- 32
>>  |   |   |-- key
>>  |   |   |-- name("etc/boot-fail-wait")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 33
>>  |   |   |-- key
>>  |   |   |-- name("etc/smbios/smbios-tables")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 34
>>  |   |   |-- key
>>  |   |   |-- name("etc/smbios/smbios-anchor")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 35
>>  |   |   |-- key
>>  |   |   |-- name("etc/e820")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 36
>>  |   |   |-- key
>>  |   |   |-- name("genroms/kvmvapic.bin")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 37
>>  |   |   |-- key
>>  |   |   |-- name("etc/system-states")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 38
>>  |   |   |-- key
>>  |   |   |-- name("etc/acpi/tables")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 39
>>  |   |   |-- key
>>  |   |   |-- name("etc/table-loader")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 40
>>  |   |   |-- key
>>  |   |   |-- name("etc/tpm/log")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   |-- 41
>>  |   |   |-- key
>>  |   |   |-- name("etc/acpi/rsdp")
>>  |   |   |-- raw
>>  |   |   `-- size
>>  |   `-- 42
>>  |   |-- key
>>  |   |-- name("bootorder")
>>  |   |-- raw
>>  |   `-- size
>>  |
>>  ...
>>  
>>  Patch 3/4 also gets us a "human readable" "by_name" listing, like so:
>>  
>>  ...
>>  |-- by_name
>>  |   |-- bootorder -> ../by_key/42
>>  |   |-- etc
>>  |   |   |-- acpi
>>  |   |   |   |-- rsdp -> ../../../by_key/41
>>  |   |   |   `-- tables -> ../../../by_key/38
>>  |   |   |-- boot-fail-wait -> ../../by_key/32
>>  |   |   |-- e820 -> ../../by_key/35
>>  |   |   |-

[Qemu-devel] [PATCH v2 07/21] block: Pass driver-specific options to .bdrv_refresh_filename()

2015-11-23 Thread Kevin Wolf
In order to decide whether a blkdebug: filename can be produced or a
json: one is necessary, blkdebug checked whether bs->options had more
options than just "config", "x-image" or "image" (the latter including
nested options). That doesn't work well when generic block layer options
are present.

This patch passes an option QDict to the driver that contains only
driver-specific options, i.e. the options for the general block layer as
well as child nodes are already filtered out. Works much better this
way.

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
---
 block.c   |  5 -
 block/blkdebug.c  | 17 ++---
 block/blkverify.c |  2 +-
 block/nbd.c   | 10 +-
 block/quorum.c|  2 +-
 include/block/block_int.h |  2 +-
 6 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/block.c b/block.c
index 02125e2..be449b9 100644
--- a/block.c
+++ b/block.c
@@ -4027,7 +4027,10 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 bs->full_open_options = NULL;
 }
 
-drv->bdrv_refresh_filename(bs);
+opts = qdict_new();
+append_open_options(opts, bs);
+drv->bdrv_refresh_filename(bs, opts);
+QDECREF(opts);
 } else if (bs->file) {
 /* Try to reconstruct valid information from the underlying file */
 bool has_open_options;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 6860a2b..bc0f041 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -726,17 +726,15 @@ static int blkdebug_truncate(BlockDriverState *bs, 
int64_t offset)
 return bdrv_truncate(bs->file->bs, offset);
 }
 
-static void blkdebug_refresh_filename(BlockDriverState *bs)
+static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 {
 QDict *opts;
 const QDictEntry *e;
 bool force_json = false;
 
-for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+for (e = qdict_first(options); e; e = qdict_next(options, e)) {
 if (strcmp(qdict_entry_key(e), "config") &&
-strcmp(qdict_entry_key(e), "x-image") &&
-strcmp(qdict_entry_key(e), "image") &&
-strncmp(qdict_entry_key(e), "image.", strlen("image.")))
+strcmp(qdict_entry_key(e), "x-image"))
 {
 force_json = true;
 break;
@@ -752,7 +750,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
 if (!force_json && bs->file->bs->exact_filename[0]) {
 snprintf(bs->exact_filename, sizeof(bs->exact_filename),
  "blkdebug:%s:%s",
- qdict_get_try_str(bs->options, "config") ?: "",
+ qdict_get_try_str(options, "config") ?: "",
  bs->file->bs->exact_filename);
 }
 
@@ -762,11 +760,8 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
 QINCREF(bs->file->bs->full_open_options);
 qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
 
-for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
-if (strcmp(qdict_entry_key(e), "x-image") &&
-strcmp(qdict_entry_key(e), "image") &&
-strncmp(qdict_entry_key(e), "image.", strlen("image.")))
-{
+for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+if (strcmp(qdict_entry_key(e), "x-image")) {
 qobject_incref(qdict_entry_value(e));
 qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
 }
diff --git a/block/blkverify.c b/block/blkverify.c
index c5f8e8d..1d75449 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -307,7 +307,7 @@ static void blkverify_attach_aio_context(BlockDriverState 
*bs,
 bdrv_attach_aio_context(s->test_file->bs, new_context);
 }
 
-static void blkverify_refresh_filename(BlockDriverState *bs)
+static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
 {
 BDRVBlkverifyState *s = bs->opaque;
 
diff --git a/block/nbd.c b/block/nbd.c
index cd6a587..416f42b 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -342,13 +342,13 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
 nbd_client_attach_aio_context(bs, new_context);
 }
 
-static void nbd_refresh_filename(BlockDriverState *bs)
+static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
 {
 QDict *opts = qdict_new();
-const char *path   = qdict_get_try_str(bs->options, "path");
-const char *host   = qdict_get_try_str(bs->options, "host");
-const char *port   = qdict_get_try_str(bs->options, "port");
-const char *export = qdict_get_try_str(bs->options, "export");
+const char *path   = qdict_get_try_str(options, "path");
+const char *host   = qdict_get_try_str(options, "host");
+const char *port   = qdict_get_try_str(options, "port");
+const char *export = qdict_get_try_str(options, "export");
 
 qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
 
diff --git a/

[Qemu-devel] [PATCH v2 00/21] block: Cache mode for children etc.

2015-11-23 Thread Kevin Wolf
This is part three (or four, depending on whether you count the bdrv_swap
removal) of what I had sent earlier as "[PATCH 00/34] block: Cache mode for
children, reopen overhaul and more". Most of the patches were actually already
reviewed in v1.

This part contains the remaining functional changes that the cover letter for
v1 advertised, and a bit more:

- You can now use node name references for backing files
- bdrv_reopen() works now properly for inherited options (don't exist before
  this series; after the series the cache options)
- bdrv_reopen() works now properly with semantically overlapping options
- bdrv_reopen() can change child node options
- And finally you can set cache mode options for backing files and other
  children now (and the reopen behaviour even makes sense

Kevin Wolf (21):
  qcow2: Add .bdrv_join_options callback
  block: Fix reopen with semantically overlapping options
  mirror: Error out when a BDS would get two BBs
  block: Allow references for backing files
  block: Consider all block layer options in append_open_options
  block: Exclude nested options only for children in
append_open_options()
  block: Pass driver-specific options to .bdrv_refresh_filename()
  block: Keep "driver" in bs->options
  block: Allow specifying child options in reopen
  block: reopen: Document option precedence and refactor accordingly
  block: Add infrastructure for option inheritance
  block: Split out parse_json_protocol()
  block: Introduce bs->explicit_options
  blockdev: Set 'format' indicates non-empty drive
  qemu-iotests: Remove cache mode test without medium
  block: reopen: Extract QemuOpts for generic block layer options
  block: Move cache options into options QDict
  blkdebug: Enable reopen
  qemu-iotests: Try setting cache mode for children
  qemu-iotests: Test cache mode option inheritance
  qemu-iotests: Test reopen with node-name/driver options

 block.c   | 457 +++-
 block/blkdebug.c  |  24 +-
 block/blkverify.c |   2 +-
 block/mirror.c|  30 +-
 block/nbd.c   |  10 +-
 block/qcow2.c |  47 +++
 block/quorum.c|   2 +-
 blockdev.c|  57 +--
 include/block/block.h |   4 +-
 include/block/block_int.h |   8 +-
 tests/qemu-iotests/051|  22 +-
 tests/qemu-iotests/051.out|  74 +++-
 tests/qemu-iotests/133|  90 +
 tests/qemu-iotests/133.out|  22 ++
 tests/qemu-iotests/142| 354 +++
 tests/qemu-iotests/142.out| 788 ++
 tests/qemu-iotests/group  |   2 +
 tests/qemu-iotests/iotests.py |   4 +-
 18 files changed, 1823 insertions(+), 174 deletions(-)
 create mode 100755 tests/qemu-iotests/133
 create mode 100644 tests/qemu-iotests/133.out
 create mode 100755 tests/qemu-iotests/142
 create mode 100644 tests/qemu-iotests/142.out

-- 
1.8.3.1




[Qemu-devel] [PATCH v2 06/21] block: Exclude nested options only for children in append_open_options()

2015-11-23 Thread Kevin Wolf
Some drivers have nested options (e.g. blkdebug rule arrays), which
don't belong to a child node and shouldn't be removed. Don't remove all
options with "." in their name, but check for the complete prefixes of
actually existing child nodes.

Signed-off-by: Kevin Wolf 
---
 block.c   | 19 +++
 include/block/block_int.h |  1 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 23d9e10..02125e2 100644
--- a/block.c
+++ b/block.c
@@ -1101,11 +1101,13 @@ static int bdrv_fill_options(QDict **options, const 
char **pfilename,
 
 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 BlockDriverState *child_bs,
+const char *child_name,
 const BdrvChildRole *child_role)
 {
 BdrvChild *child = g_new(BdrvChild, 1);
 *child = (BdrvChild) {
 .bs = child_bs,
+.name   = child_name,
 .role   = child_role,
 };
 
@@ -1165,7 +1167,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 bs->backing = NULL;
 goto out;
 }
-bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
+bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
 bs->open_flags &= ~BDRV_O_NO_BACKING;
 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
@@ -1322,7 +1324,7 @@ BdrvChild *bdrv_open_child(const char *filename,
 goto done;
 }
 
-c = bdrv_attach_child(parent, bs, child_role);
+c = bdrv_attach_child(parent, bs, bdref_key, child_role);
 
 done:
 qdict_del(options, bdref_key);
@@ -3952,13 +3954,22 @@ static bool append_open_options(QDict *d, 
BlockDriverState *bs)
 {
 const QDictEntry *entry;
 QemuOptDesc *desc;
+BdrvChild *child;
 bool found_any = false;
+const char *p;
 
 for (entry = qdict_first(bs->options); entry;
  entry = qdict_next(bs->options, entry))
 {
-/* Only take options for this level */
-if (strchr(qdict_entry_key(entry), '.')) {
+/* Exclude options for children */
+QLIST_FOREACH(child, &bs->children, next) {
+if (strstart(qdict_entry_key(entry), child->name, &p)
+&& (!*p || *p == '.'))
+{
+break;
+}
+}
+if (child) {
 continue;
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 77dc165..b2325aa 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -351,6 +351,7 @@ extern const BdrvChildRole child_format;
 
 struct BdrvChild {
 BlockDriverState *bs;
+const char *name;
 const BdrvChildRole *role;
 QLIST_ENTRY(BdrvChild) next;
 QLIST_ENTRY(BdrvChild) next_parent;
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 14/21] blockdev: Set 'format' indicates non-empty drive

2015-11-23 Thread Kevin Wolf
Creating an empty drive while specifying 'format' doesn't make sense.
The specified format driver would simply be ignored.

Make a set 'format' option an indication that a non-empty drive should
be created. This makes 'format' consistent with 'driver' and allows
using it with a block driver that doesn't need any other options (like
null-co/null-aio).

Signed-off-by: Kevin Wolf 
---
 blockdev.c| 5 +
 tests/qemu-iotests/iotests.py | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 313841b..afaeef9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -490,7 +490,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 QDict *interval_dict = NULL;
 QList *interval_list = NULL;
 const char *id;
-bool has_driver_specific_opts;
 BlockdevDetectZeroesOptions detect_zeroes =
 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 const char *throttling_group = NULL;
@@ -514,8 +513,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 qdict_del(bs_opts, "id");
 }
 
-has_driver_specific_opts = !!qdict_size(bs_opts);
-
 /* extract parameters */
 snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
 
@@ -578,7 +575,7 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 }
 
 /* init */
-if ((!file || !*file) && !has_driver_specific_opts) {
+if ((!file || !*file) && !qdict_size(bs_opts)) {
 BlockBackendRootState *blk_rs;
 
 blk = blk_new(qemu_opts_id(opts), errp);
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index ff5905f..f36add8 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -143,12 +143,12 @@ class VM(object):
 def add_drive(self, path, opts='', interface='virtio'):
 '''Add a virtio-blk drive to the VM'''
 options = ['if=%s' % interface,
-   'format=%s' % imgfmt,
'cache=%s' % cachemode,
'id=drive%d' % self._num_drives]
 
 if path is not None:
 options.append('file=%s' % path)
+options.append('format=%s' % imgfmt)
 
 if opts:
 options.append(opts)
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 08/21] block: Keep "driver" in bs->options

2015-11-23 Thread Kevin Wolf
Instead of passing a separate drv argument to bdrv_open_common(), just
make sure that a "driver" option is set in the QDict. This also means
that a "driver" entry is consistently present in bs->options now.

This is another step towards keeping all options in the QDict (which is
the represenation of the blockdev-add QMP command).

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
---
 block.c | 57 -
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/block.c b/block.c
index be449b9..c505765 100644
--- a/block.c
+++ b/block.c
@@ -817,6 +817,11 @@ static QemuOptsList bdrv_runtime_opts = {
 .type = QEMU_OPT_STRING,
 .help = "Node name of the block device node",
 },
+{
+.name = "driver",
+.type = QEMU_OPT_STRING,
+.help = "Block driver to use for the node",
+},
 { /* end of list */ }
 },
 };
@@ -827,18 +832,31 @@ static QemuOptsList bdrv_runtime_opts = {
  * Removes all processed options from *options.
  */
 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
-QDict *options, int flags, BlockDriver *drv, Error **errp)
+QDict *options, int flags, Error **errp)
 {
 int ret, open_flags;
 const char *filename;
+const char *driver_name = NULL;
 const char *node_name = NULL;
 QemuOpts *opts;
+BlockDriver *drv;
 Error *local_err = NULL;
 
-assert(drv != NULL);
 assert(bs->file == NULL);
 assert(options != NULL && bs->options != options);
 
+opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
+qemu_opts_absorb_qdict(opts, options, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+ret = -EINVAL;
+goto fail_opts;
+}
+
+driver_name = qemu_opt_get(opts, "driver");
+drv = bdrv_find_format(driver_name);
+assert(drv != NULL);
+
 if (file != NULL) {
 filename = file->bs->filename;
 } else {
@@ -848,19 +866,12 @@ static int bdrv_open_common(BlockDriverState *bs, 
BdrvChild *file,
 if (drv->bdrv_needs_filename && !filename) {
 error_setg(errp, "The '%s' block driver requires a file name",
drv->format_name);
-return -EINVAL;
-}
-
-trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
-
-opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
-qemu_opts_absorb_qdict(opts, options, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
 ret = -EINVAL;
 goto fail_opts;
 }
 
+trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
+
 node_name = qemu_opt_get(opts, "node-name");
 bdrv_assign_node_name(bs, node_name, &local_err);
 if (local_err) {
@@ -1477,11 +1488,14 @@ static int bdrv_open_inherit(BlockDriverState **pbs, 
const char *filename,
 goto fail;
 }
 
+bs->open_flags = flags;
+bs->options = options;
+options = qdict_clone_shallow(options);
+
 /* Find the right image format driver */
 drvname = qdict_get_try_str(options, "driver");
 if (drvname) {
 drv = bdrv_find_format(drvname);
-qdict_del(options, "driver");
 if (!drv) {
 error_setg(errp, "Unknown driver: '%s'", drvname);
 ret = -EINVAL;
@@ -1497,10 +1511,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, 
const char *filename,
 qdict_del(options, "backing");
 }
 
-bs->open_flags = flags;
-bs->options = options;
-options = qdict_clone_shallow(options);
-
 /* Open image file without format layer */
 if ((flags & BDRV_O_PROTOCOL) == 0) {
 if (flags & BDRV_O_RDWR) {
@@ -1528,6 +1538,19 @@ static int bdrv_open_inherit(BlockDriverState **pbs, 
const char *filename,
 if (ret < 0) {
 goto fail;
 }
+/*
+ * This option update would logically belong in bdrv_fill_options(),
+ * but we first need to open bs->file for the probing to work, while
+ * opening bs->file already requires the (mostly) final set of options
+ * so that cache mode etc. can be inherited.
+ *
+ * Adding the driver later is somewhat ugly, but it's not an option
+ * that would ever be inherited, so it's correct. We just need to make
+ * sure to update both bs->options (which has the full effective
+ * options for bs) and options (which has file.* already removed).
+ */
+qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
+qdict_put(options, "driver", qstring_from_str(drv->format_name));
 } else if (!drv) {
 error_setg(errp, "Must specify either driver or file");
 ret = -EINVAL;
@@ -1541,7 +1564,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, 
const char *filename,
 assert(!(flags & BDRV_O_PROTOCOL) || !file

[Qemu-devel] [PATCH v2 05/21] block: Consider all block layer options in append_open_options

2015-11-23 Thread Kevin Wolf
The code already special-cased "node-name", which is currently the only
option passed in the QDict that isn't driver-specific. Generalise the
code to take all general block layer options into consideration.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
---
 block.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index ca6c4e9..23d9e10 100644
--- a/block.c
+++ b/block.c
@@ -3951,20 +3951,30 @@ out:
 static bool append_open_options(QDict *d, BlockDriverState *bs)
 {
 const QDictEntry *entry;
+QemuOptDesc *desc;
 bool found_any = false;
 
 for (entry = qdict_first(bs->options); entry;
  entry = qdict_next(bs->options, entry))
 {
-/* Only take options for this level and exclude all non-driver-specific
- * options */
-if (!strchr(qdict_entry_key(entry), '.') &&
-strcmp(qdict_entry_key(entry), "node-name"))
-{
-qobject_incref(qdict_entry_value(entry));
-qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
-found_any = true;
+/* Only take options for this level */
+if (strchr(qdict_entry_key(entry), '.')) {
+continue;
 }
+
+/* And exclude all non-driver-specific options */
+for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
+if (!strcmp(qdict_entry_key(entry), desc->name)) {
+break;
+}
+}
+if (desc->name) {
+continue;
+}
+
+qobject_incref(qdict_entry_value(entry));
+qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
+found_any = true;
 }
 
 return found_any;
-- 
1.8.3.1




  1   2   3   >