[Qemu-devel] [PATCH] migration/fd: abort migration if receive POLLHUP event

2018-04-21 Thread Wang Xin
If the fd socket peer closed shortly, ppoll may receive a POLLHUP
event before the expected POLLIN event, and qemu will do nothing
but goes into an infinite loop of the POLLHUP event.

So, abort the migration if we receive a POLLHUP event.

Signed-off-by: Wang Xin 

diff --git a/migration/fd.c b/migration/fd.c
index cd06182..5932c87 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -15,6 +15,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "channel.h"
 #include "fd.h"
 #include "monitor/monitor.h"
@@ -46,6 +47,11 @@ static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
  GIOCondition condition,
  gpointer opaque)
 {
+if (condition & G_IO_HUP) {
+error_report("The migration peer closed, job abort");
+exit(EXIT_FAILURE);
+}
+
 migration_channel_process_incoming(ioc);
 object_unref(OBJECT(ioc));
 return G_SOURCE_REMOVE;
@@ -67,7 +73,7 @@ void fd_start_incoming_migration(const char *infd, Error 
**errp)
 
 qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-incoming");
 qio_channel_add_watch(ioc,
-  G_IO_IN,
+  G_IO_IN | G_IO_HUP,
   fd_accept_incoming_migration,
   NULL,
   NULL);
-- 
2.8.1.windows.1





Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

2018-04-21 Thread Cédric Le Goater
On 04/20/2018 02:09 PM, Peter Maydell wrote:
> On 13 April 2018 at 08:52, Cédric Le Goater  wrote:
>> On the POWER9 processor, the XIVE interrupt controller can control
>> interrupt sources using MMIO to trigger events, to EOI or to turn off
>> the sources. Priority management and interrupt acknowledgment is also
>> controlled by MMIO in the presenter sub-engine.
>>
>> These MMIO regions are exposed to guests in QEMU with a set of 'ram
>> device' memory mappings, similarly to VFIO, and the VMAs are populated
>> dynamically with the appropriate pages using a fault handler.
>>
>> But, these regions are an issue for migration. We need to discard the
>> associated RAMBlocks from the RAM state on the source VM and let the
>> destination VM rebuild the memory mappings on the new host in the
>> post_load() operation just before resuming the system.
>>
>> To achieve this goal, the following introduces a new RAMBlock flag
>> RAM_MIGRATABLE which is updated in the vmstate_register_ram() and
>> vmstate_unregister_ram() routines. This flag is then used by the
>> migration to identify RAMBlocks to discard on the source. Some checks
>> are also performed on the destination to make sure nothing invalid was
>> sent.
>>
>> Signed-off-by: Cédric Le Goater 
> 
> So, this is conceptually the right thing, but it is a migration
> compat break for any board which has a ram block which it doesn't
> call vmstate_register_ram() for. These are mostly going to be
> places that call one of the _nomigrate() functions to create an
> MMIO region, and then don't register the ram by hand either.> Those are bugs, 
> in my view, but we should consider fixing
> them while we're here.
>
> Here's the results of a grep and eyeballing of the results:
> 
> These places are OK, because they register the memory region
> by hand one way or another:
> 
> numa stuff, registers it when the backend is used:
> backends/hostmem-ram.c:
> memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend),
> path,
> 
> registers it globally by hand:
> numa.c:memory_region_init_ram_nomigrate(mr, owner, name,
> ram_size, &error_fatal);
> numa.c:memory_region_init_ram_nomigrate(mr, owner, name,
> ram_size, &error_fatal);
> hw/arm/aspeed_soc.c:memory_region_init_ram_nomigrate(&s->sram,
> OBJECT(dev), "aspeed.sram",
> hw/block/onenand.c:memory_region_init_ram_nomigrate(&s->ram,
> OBJECT(s), "onenand.ram",
> hw/display/cg3.c:memory_region_init_ram_nomigrate(&s->rom, obj,
> "cg3.prom", FCODE_MAX_ROM_SIZE,
> hw/display/tcx.c:memory_region_init_ram_nomigrate(&s->rom, obj,
> "tcx.prom", FCODE_MAX_ROM_SIZE,
> hw/display/tcx.c:memory_region_init_ram_nomigrate(&s->vram_mem,
> OBJECT(s), "tcx.vram",
> hw/display/vga.c:memory_region_init_ram_nomigrate(&s->vram, obj,
> "vga.vram", s->vram_size,
> hw/input/milkymist-softusb.c:
> memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s),
> "milkymist-softusb.pmem",
> hw/input/milkymist-softusb.c:
> memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s),
> "milkymist-softusb.dmem",
> hw/net/milkymist-minimac2.c:
> memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev),
> "milkymist-minimac2.buffers",
> hw/pci-host/prep.c:memory_region_init_ram_nomigrate(&s->bios,
> OBJECT(s), "bios", BIOS_SIZE,
> hw/sparc/sun4m.c:memory_region_init_ram_nomigrate(&s->mem, obj,
> hw/sparc/sun4m.c:memory_region_init_ram_nomigrate(&s->mem, obj,
> "sun4m.afx", 4, &error_fatal);
> hw/sparc/sun4m.c:memory_region_init_ram_nomigrate(&s->prom, obj,
> "sun4m.prom", PROM_SIZE_MAX,
> hw/sparc64/sun4u.c:memory_region_init_ram_nomigrate(&s->prom, obj,
> "sun4u.prom", PROM_SIZE_MAX,
> hw/sparc64/sun4u.c:memory_region_init_ram_nomigrate(&d->ram,
> OBJECT(d), "sun4u.ram", d->size,
> hw/xtensa/xtfpga.c:memory_region_init_ram_nomigrate(ram,
> OBJECT(s), "open_eth.ram", 16384,
> 
> registers it non-globally by hand:
> hw/xen/xen_pt_load_rom.c:
> memory_region_init_ram_nomigrate(&dev->rom, owner, name, st.st_size,
> &error_abort);
> 
> These callsites are not OK, because they don't register the ram:
> 
> hw/arm/aspeed.c:memory_region_init_rom_nomigrate(boot_rom,
> OBJECT(bmc), "aspeed.boot_rom",
> hw/arm/highbank.c:memory_region_init_ram_nomigrate(sysram, NULL,
> "highbank.sysram", 0x8000,
> hw/mips/boston.c:memory_region_init_rom_nomigrate(flash, NULL,
> hw/mips/mips_malta.c:memory_region_init_ram_nomigrate(bios_copy,
> NULL, "bios.1fc", BIOS_SIZE,
> hw/net/dp8393x.c:memory_region_init_ram_nomigrate(&s->prom, OBJECT(dev),
>   [device only used on mips jazz board]
> hw/pci-host/xilinx-pcie.c:memory_region_init_ram_nomigrate(&s->io,
> OBJECT(s), "io", 16, NULL);
>   [device only used on mips boston board]
> 
> Notes:
>  * any device that registers a ramblock globally is a bit dodgy, because
> it means you can't have more than one of it (the ramblock names would
> clash). We should fix those devices for the cases where we're willing
> to take the migration compat break.

Re: [Qemu-devel] [PATCH resend v2] qga: unset frozen state if no mount points are frozen

2018-04-21 Thread Chen Hanxiao

At 2018-04-02 14:15:48, "Chen Hanxiao"  wrote:
>From: Chen Hanxiao 
>
>If we set mountpoints to qmp_guest_fsfreeze_freeze_list,
>we may got nothing to freeze as all mountpoints are
>not valid.
>So call ga_unset_frozen in this senario.
>
>Also, if we return 0 frozen fs, there is no need to call
>guest-fsfreeze-thaw.
>
>Cc: Michael Roth 
>Signed-off-by: Chen Hanxiao 
>

Any comments?

Regards,
- Chen

Re: [Qemu-devel] [PATCH 2/3] hw/arm/aspeed: don't make 'boot_rom' region 'nomigrate'

2018-04-21 Thread Cédric Le Goater
On 04/20/2018 02:48 PM, Peter Maydell wrote:
> Currently we use memory_region_init_ram_nomigrate() to create
> the "aspeed.boot_rom" memory region, and we don't manually
> register it with vmstate_register_ram(). This currently
> means that its contents are migrated but as a ram block
> whose name is the empty string; in future it may mean they
> are not migrated at all. Use memory_region_init_ram() instead.
> 
> Note that this is a cross-version migration compatibility break
> for the "palmetto-bmc", "ast2500-evb" and "romulus-bmc" machines.

Migration does not work for these. 

> Signed-off-by: Peter Maydell 

Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 

On palmetto, romulus and ast2500-evb boards 

Thanks

C.

> ---
>  hw/arm/aspeed.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
> index 7088c907bd..aecb3c1e75 100644
> --- a/hw/arm/aspeed.c
> +++ b/hw/arm/aspeed.c
> @@ -225,7 +225,7 @@ static void aspeed_board_init(MachineState *machine,
>   * SoC and 128MB for the AST2500 SoC, which is twice as big as
>   * needed by the flash modules of the Aspeed machines.
>   */
> -memory_region_init_rom_nomigrate(boot_rom, OBJECT(bmc), 
> "aspeed.boot_rom",
> +memory_region_init_rom(boot_rom, OBJECT(bmc), "aspeed.boot_rom",
> fl->size, &error_abort);
>  memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR,
>  boot_rom);
> 




Re: [Qemu-devel] [PATCH 3/3] hw/arm/aspeed_soc: don't use vmstate_register_ram_global for SRAM

2018-04-21 Thread Cédric Le Goater
On 04/20/2018 02:48 PM, Peter Maydell wrote:
> Currently we use vmstate_register_ram_global() for the SRAM;
> this is not a good idea for devices, because it means that
> you can only ever create one instance of the device, as
> the second instance would get a RAM block name clash.
> Instead, use memory_region_init_ram(), which automatically
> registers the RAM block with a local-to-the-device name.
> 
> Note that this is a cross-version migration compatibility break
> for the "palmetto-bmc", "ast2500-evb" and "romulus-bmc" machines.
>
> Signed-off-by: Peter Maydell 

Reviewed-by: Cédric Le Goater 
Tested-by: Cédric Le Goater 

On palmetto, romulus and ast2500-evb boards 

Thanks

C.> ---
>  hw/arm/aspeed_soc.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
> index 30d25f8b06..407f10d0d4 100644
> --- a/hw/arm/aspeed_soc.c
> +++ b/hw/arm/aspeed_soc.c
> @@ -186,13 +186,12 @@ static void aspeed_soc_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  /* SRAM */
> -memory_region_init_ram_nomigrate(&s->sram, OBJECT(dev), "aspeed.sram",
> +memory_region_init_ram(&s->sram, OBJECT(dev), "aspeed.sram",
> sc->info->sram_size, &err);
>  if (err) {
>  error_propagate(errp, err);
>  return;
>  }
> -vmstate_register_ram_global(&s->sram);
>  memory_region_add_subregion(get_system_memory(), ASPEED_SOC_SRAM_BASE,
>  &s->sram);
>  
> 




Re: [Qemu-devel] [PATCH v2] migration: discard non-migratable RAMBlocks

2018-04-21 Thread Peter Maydell
On 21 April 2018 at 09:52, Cédric Le Goater  wrote:
> On 04/20/2018 02:09 PM, Peter Maydell wrote:
>> Notes:
>>  * any device that registers a ramblock globally is a bit dodgy, because
>> it means you can't have more than one of it (the ramblock names would
>> clash). We should fix those devices for the cases where we're willing
>> to take the migration compat break.
>
> There are quite a few models calling memory_region_init_ram() with
> a NULL owner, see below. Should we fix all these ?

Note that I said "any device". A board model that uses a NULL
owner (or does a register_global) is fine, because by definition
there is only one board. (The "null owner" case is really intended
for this -- traditionally there was no useful owner object available
to board code, though these days MachineState is an object
so it could be used.) Most of the examples you cite are in
board code and are fine. Where they are in devices we should
indeed consider making them pass in the device pointer as
the owner, though.

thanks
-- PMM



Re: [Qemu-devel] [PATCH 2/3] hw/arm/aspeed: don't make 'boot_rom' region 'nomigrate'

2018-04-21 Thread Peter Maydell
On 21 April 2018 at 10:07, Cédric Le Goater  wrote:
> On 04/20/2018 02:48 PM, Peter Maydell wrote:
>> Currently we use memory_region_init_ram_nomigrate() to create
>> the "aspeed.boot_rom" memory region, and we don't manually
>> register it with vmstate_register_ram(). This currently
>> means that its contents are migrated but as a ram block
>> whose name is the empty string; in future it may mean they
>> are not migrated at all. Use memory_region_init_ram() instead.
>>
>> Note that this is a cross-version migration compatibility break
>> for the "palmetto-bmc", "ast2500-evb" and "romulus-bmc" machines.
>
> Migration does not work for these.

Well, at least we're not breaking anything :-)
Do you know why migration doesn't work?

thanks
-- PMM



Re: [Qemu-devel] [PATCH for-2.13] Clear mem_path if we fall back to anonymous RAM allocation

2018-04-21 Thread David Gibson
On Fri, Apr 20, 2018 at 05:34:38PM +0200, Paolo Bonzini wrote:
> On 20/04/2018 04:18, David Gibson wrote:
> > Paolo et al, as with my earlier patches adding some extensions to the
> > helpers for determining backing page sizes, if there are no objections
> > can I get an ack to merge this via my ppc tree?
> 
> Yes, go ahead!

Thanks, applied to ppc-for-2.13.

> 
> Thanks,
> 
> Paolo
> 




-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 2/3] hw/arm/aspeed: don't make 'boot_rom' region 'nomigrate'

2018-04-21 Thread Cédric Le Goater
On 04/21/2018 11:34 AM, Peter Maydell wrote:
> On 21 April 2018 at 10:07, Cédric Le Goater  wrote:
>> On 04/20/2018 02:48 PM, Peter Maydell wrote:
>>> Currently we use memory_region_init_ram_nomigrate() to create
>>> the "aspeed.boot_rom" memory region, and we don't manually
>>> register it with vmstate_register_ram(). This currently
>>> means that its contents are migrated but as a ram block
>>> whose name is the empty string; in future it may mean they
>>> are not migrated at all. Use memory_region_init_ram() instead.
>>>
>>> Note that this is a cross-version migration compatibility break
>>> for the "palmetto-bmc", "ast2500-evb" and "romulus-bmc" machines.
>>
>> Migration does not work for these.
> 
> Well, at least we're not breaking anything :-)
> Do you know why migration doesn't work?

A quick migration test shows that :

qemu-system-arm: Missing section footer for aspeed.timerctrl
qemu-system-arm: load of migration failed: Invalid argument

which is surprising, the state looks correct. I will dig in. 

Thanks,

C.


"aspeed.timerctrl (8)": {
"ctrl": "0x0003",
"ctrl2": "0x",
"timers": [
{
"id": "0x00",
"level": "0x",
"timer": "00 00 00 29 aa bd 88 44",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x01",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x02",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x03",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x04",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x05",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x06",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
},
{
"id": "0x07",
"level": "0x",
"timer": "ff ff ff ff ff ff ff ff",
"reload": "0x",
"match": [
"0x",
"0x"
]
}
]
},



Re: [Qemu-devel] Let event loop use non-default GMainContext

2018-04-21 Thread Stefan Hajnoczi
On Sat, Apr 21, 2018 at 11:29 AM, Eva Chen  wrote:
> Thanks for your reply.
>
> Actually, I don't know there is existing work on emulating heterogeneous
> system under QEMU. The only work I know is that QEMU has implemented
> multicore in one QEMU process. But what I want to do is to simulate two
> heterogeneous cpu core at the same time.
> May I ask if you know any related work?

I'm not familiar with the work that has been done, but here are two
starting points you may find interesting:

http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1007.285&rep=rep1&type=pdf

https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg00171.html

Stefan



Re: [Qemu-devel] [PATCH 00/13] Drop compile time limit on number of serial ports

2018-04-21 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180420145249.32435-1-peter.mayd...@linaro.org
Subject: [Qemu-devel] [PATCH 00/13] Drop compile time limit on number of serial 
ports

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   
patchew/20180420145249.32435-1-peter.mayd...@linaro.org -> 
patchew/20180420145249.32435-1-peter.mayd...@linaro.org
Switched to a new branch 'test'
96dfccb00b vl.c: new function max_serial_hds()
3e71fd34eb vl.c: Remove compile time limit on number of serial ports
c43b9fdfc2 superio: Don't use MAX_SERIAL_PORTS for serial port limit
b770c540a9 serial-isa: Use MAX_ISA_SERIAL_PORTS instead of MAX_SERIAL_PORTS
038ffe574e hw/char/exynos4210_uart.c: Remove unneeded handling of NULL chardev
813b6d6287 Remove checks on MAX_SERIAL_PORTS that are just bounds checks
066f676d66 Change references to serial_hds[] to serial_hd()
ede9de0c27 vl.c: Provide accessor function serial_hd() for serial_hds[] array
bfbf0f20cb hw/xtensa/xtfpga.c: Don't create "null" chardevs for serial devices
d31d02cbd8 hw/mips/mips_malta: Don't create "null" chardevs for serial devices
32216ce5e5 hw/mips/boston.c: Don't create "null" chardevs for serial devices
364918ebd9 hw/arm/fsl-imx*: Don't create "null" chardevs for serial devices
4a5a612b77 hw/char/serial: Allow disconnected chardevs

=== OUTPUT BEGIN ===
Checking PATCH 1/13: hw/char/serial: Allow disconnected chardevs...
Checking PATCH 2/13: hw/arm/fsl-imx*: Don't create "null" chardevs for serial 
devices...
Checking PATCH 3/13: hw/mips/boston.c: Don't create "null" chardevs for serial 
devices...
Checking PATCH 4/13: hw/mips/mips_malta: Don't create "null" chardevs for 
serial devices...
Checking PATCH 5/13: hw/xtensa/xtfpga.c: Don't create "null" chardevs for 
serial devices...
Checking PATCH 6/13: vl.c: Provide accessor function serial_hd() for 
serial_hds[] array...
Checking PATCH 7/13: Change references to serial_hds[] to serial_hd()...
WARNING: line over 80 characters
#581: FILE: hw/cris/axis_dev88.c:340:
+etraxfs_ser_create(0x30026000 + i * 0x2000, irq[0x14 + i], 
serial_hd(i));

ERROR: spaces required around that '/' (ctx:VxV)
#824: FILE: hw/moxie/moxiesim.c:146:
+   800/16, serial_hd(0), DEVICE_LITTLE_ENDIAN);
   ^

total: 1 errors, 1 warnings, 852 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 8/13: Remove checks on MAX_SERIAL_PORTS that are just bounds 
checks...
Checking PATCH 9/13: hw/char/exynos4210_uart.c: Remove unneeded handling of 
NULL chardev...
Checking PATCH 10/13: serial-isa: Use MAX_ISA_SERIAL_PORTS instead of 
MAX_SERIAL_PORTS...
Checking PATCH 11/13: superio: Don't use MAX_SERIAL_PORTS for serial port 
limit...
Checking PATCH 12/13: vl.c: Remove compile time limit on number of serial 
ports...
ERROR: do not initialise statics to 0 or NULL
#35: FILE: vl.c:157:
+static int num_serial_hds = 0;

ERROR: do not initialise statics to 0 or NULL
#36: FILE: vl.c:158:
+static Chardev **serial_hds = NULL;

total: 2 errors, 0 warnings, 52 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 13/13: vl.c: new function max_serial_hds()...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[Qemu-devel] unknown keycodes

2018-04-21 Thread Mike R
(qemu) unknown keycodes `empty+aliases(qwerty)’, please report to 
qemu-devel@nongnu.org

reported.





signature.asc
Description: Message signed with OpenPGP using GPGMail


[Qemu-devel] [PATCH v2 0/9] block: Add COR filter driver

2018-04-21 Thread Max Reitz
This series adds a copy-on-read block filter driver which works by
simply setting the BDRV_REQ_COPY_ON_READ flag on write requests (and
otherwise just passing everything through).

Cover letter of v1:
http://lists.nongnu.org/archive/html/qemu-block/2018-04/msg00285.html


v2:
- Renamed the driver "copy-on-read" (from just "cor") [Stefan]
- Commit message typo fix in patch 2 [Stefan, Berto]


git-backport-diff against v2:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/9:[0020] [FC] 'block: Add COR filter driver'
002/9:[] [--] 'block: BLK_PERM_WRITE includes ..._UNCHANGED'
003/9:[] [--] 'block: Add BDRV_REQ_WRITE_UNCHANGED flag'
004/9:[] [--] 'block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes'
005/9:[] [--] 'block/quorum: Support BDRV_REQ_WRITE_UNCHANGED'
006/9:[] [--] 'block: Support BDRV_REQ_WRITE_UNCHANGED in filters'
007/9:[] [--] 'iotests: Clean up wrap image in 197'
008/9:[0010] [FC] 'iotests: Copy 197 for COR filter driver'
009/9:[0004] [FC] 'iotests: Add test for COR across nodes'


Max Reitz (9):
  block: Add COR filter driver
  block: BLK_PERM_WRITE includes ..._UNCHANGED
  block: Add BDRV_REQ_WRITE_UNCHANGED flag
  block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes
  block/quorum: Support BDRV_REQ_WRITE_UNCHANGED
  block: Support BDRV_REQ_WRITE_UNCHANGED in filters
  iotests: Clean up wrap image in 197
  iotests: Copy 197 for COR filter driver
  iotests: Add test for COR across nodes

 block/Makefile.objs|   2 +-
 qapi/block-core.json   |   5 +-
 include/block/block.h  |   9 ++-
 block/blkdebug.c   |   9 +--
 block/blkreplay.c  |   3 +
 block/blkverify.c  |   3 +
 block/copy-on-read.c   | 173 +
 block/io.c |  12 +++-
 block/mirror.c |   2 +
 block/quorum.c |  19 +++--
 block/raw-format.c |   9 +--
 block/throttle.c   |   6 +-
 tests/qemu-iotests/197 |   1 +
 tests/qemu-iotests/215 | 120 +++
 tests/qemu-iotests/215.out |  26 +++
 tests/qemu-iotests/216 | 117 ++
 tests/qemu-iotests/216.out |  28 
 tests/qemu-iotests/group   |   2 +
 18 files changed, 524 insertions(+), 22 deletions(-)
 create mode 100644 block/copy-on-read.c
 create mode 100755 tests/qemu-iotests/215
 create mode 100644 tests/qemu-iotests/215.out
 create mode 100755 tests/qemu-iotests/216
 create mode 100644 tests/qemu-iotests/216.out

-- 
2.14.3




[Qemu-devel] [PATCH v2 2/9] block: BLK_PERM_WRITE includes ..._UNCHANGED

2018-04-21 Thread Max Reitz
Currently we never actually check whether the WRITE_UNCHANGED
permission has been taken for unchanging writes.  But the one check that
is commented out checks both WRITE and WRITE_UNCHANGED; and considering
that WRITE_UNCHANGED is already documented as being weaker than WRITE,
we should probably explicitly document WRITE to include WRITE_UNCHANGED.

Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 include/block/block.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index cdec3639a3..397b5e8d44 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -205,6 +205,9 @@ enum {
  * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
  * required for writes to the block node when the caller promises that
  * the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
  */
 BLK_PERM_WRITE_UNCHANGED= 0x04,
 
-- 
2.14.3




[Qemu-devel] [PATCH v2 1/9] block: Add COR filter driver

2018-04-21 Thread Max Reitz
This adds a simple copy-on-read filter driver.  It relies on the already
existing COR functionality in the central block layer code, which may be
moved here once we no longer need it there.

Signed-off-by: Max Reitz 
---
 block/Makefile.objs  |   2 +-
 qapi/block-core.json |   5 +-
 block/copy-on-read.c | 171 +++
 3 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 block/copy-on-read.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index d644bac60a..899bfb5e2c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-y += throttle.o
+block-obj-y += throttle.o copy-on-read.o
 
 block-obj-y += crypto.o
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c50517bff3..32ca94e294 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2506,11 +2506,12 @@
 # @vxhs: Since 2.10
 # @throttle: Since 2.11
 # @nvme: Since 2.12
+# @copy-on-read: Since 2.13
 #
 # Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
-  'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop',
+  'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read',
 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
@@ -3522,6 +3523,7 @@
   'blkverify':  'BlockdevOptionsBlkverify',
   'bochs':  'BlockdevOptionsGenericFormat',
   'cloop':  'BlockdevOptionsGenericFormat',
+  'copy-on-read':'BlockdevOptionsGenericFormat',
   'dmg':'BlockdevOptionsGenericFormat',
   'file':   'BlockdevOptionsFile',
   'ftp':'BlockdevOptionsCurlFtp',
@@ -4049,6 +4051,7 @@
   'blkverify':  'BlockdevCreateNotSupported',
   'bochs':  'BlockdevCreateNotSupported',
   'cloop':  'BlockdevCreateNotSupported',
+  'copy-on-read':   'BlockdevCreateNotSupported',
   'dmg':'BlockdevCreateNotSupported',
   'file':   'BlockdevCreateOptionsFile',
   'ftp':'BlockdevCreateNotSupported',
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
new file mode 100644
index 00..823ec751c4
--- /dev/null
+++ b/block/copy-on-read.c
@@ -0,0 +1,171 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Author:
+ *   Max Reitz 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+
+
+static int cor_open(BlockDriverState *bs, QDict *options, int flags,
+Error **errp)
+{
+bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
+   errp);
+if (!bs->file) {
+return -EINVAL;
+}
+
+bs->supported_write_flags = BDRV_REQ_FUA &
+bs->file->bs->supported_write_flags;
+
+bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+bs->file->bs->supported_zero_flags;
+
+return 0;
+}
+
+
+static void cor_close(BlockDriverState *bs)
+{
+}
+
+
+#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+  | BLK_PERM_WRITE \
+  | BLK_PERM_RESIZE)
+#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)
+
+static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
+   const BdrvChildRole *role,
+   BlockReopenQueue *reopen_queue,
+   uint64_t perm, uint64_t shared,
+   uint64_t *nperm, uint64_t *nshared)
+{
+if (c == NULL) {
+*nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
+*nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
+return;
+}
+
+*nperm = (perm & PERM_PASSTHROUGH) |
+ (c->perm & PERM_UNCHANGED);
+*nshared = (shared & PERM_PASSTHROUGH) |
+   (c->shared_perm & PERM_UNCHANGED);
+}
+
+
+static int64_t cor_getlength(BlockDriverState *bs)
+{
+return bdrv_getlength(bs->file->bs);
+}
+
+
+static int cor_truncate(BlockDriverState *bs, int64_t offset,

[Qemu-devel] [PATCH v2 9/9] iotests: Add test for COR across nodes

2018-04-21 Thread Max Reitz
COR across nodes (that is, you have some filter node between the
actually COR target and the node that performs the COR) cannot reliably
work together with the permission system when there is no explicit COR
node that can request the WRITE_UNCHANGED permission for its child.
This is because COR (currently) sneaks its requests by the usual
permission checks, so it can work without a WRITE* permission; but if
there is a filter node in between, that will re-issue the request, which
then passes through the usual check -- and if nobody has requested a
WRITE_UNCHANGED permission, that check will fail.

There is no real direct fix apart from hoping that there is someone who
has requested that permission; in case of just the qemu-io HMP command
(and no guest device), however, that is not the case.  The real real fix
is to implement the copy-on-read flag through an implicitly added COR
node.  Such a node can request the necessary permissions as shown in
this test.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/216 | 117 +
 tests/qemu-iotests/216.out |  28 +++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 146 insertions(+)
 create mode 100755 tests/qemu-iotests/216
 create mode 100644 tests/qemu-iotests/216.out

diff --git a/tests/qemu-iotests/216 b/tests/qemu-iotests/216
new file mode 100755
index 00..b362cf93a8
--- /dev/null
+++ b/tests/qemu-iotests/216
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+#
+# Copy-on-read tests using a COR filter node
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+# Creator/Owner: Max Reitz 
+#
+# Non-shared storage migration test using NBD server and drive-mirror
+
+import iotests
+from iotests import log, qemu_img_pipe, qemu_io, filter_qemu_io
+
+# Need backing file support
+iotests.verify_image_format(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'])
+iotests.verify_platform(['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# The old copy-on-read mechanism without a filter node cannot request
+# WRITE_UNCHANGED permissions for its child.  Therefore it just tries
+# to sneak its write by the usual permission system and holds its
+# fingers crossed.  However, that sneaking does not work so well when
+# there is a filter node in the way: That will receive the write
+# request and re-issue a new one to its child, which this time is a
+# proper write request that will make the permission system cough --
+# unless there is someone at the top (like a guest device) that has
+# requested write permissions.
+#
+# A COR filter node, however, can request the proper permissions for
+# its child and therefore is not hit by this issue.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+qemu_img_pipe('create', '-f', iotests.imgfmt, base_img_path, '64M')
+
+log(filter_qemu_io(qemu_io(base_img_path, '-c', 'write -P 1 0M 1M')))
+
+qemu_img_pipe('create', '-f', iotests.imgfmt, '-b', base_img_path,
+  top_img_path)
+
+log(filter_qemu_io(qemu_io(top_img_path,  '-c', 'write -P 2 1M 1M')))
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+# Compare with e.g. the following:
+#   vm.add_drive_raw('if=none,node-name=node0,copy-on-read=on,driver=raw,' 
\
+#'file.driver=%s,file.file.filename=%s' %
+#   (iotests.imgfmt, top_img_path))
+# (Remove the blockdev-add instead.)
+# ((Not tested here because it hits an assertion in the permission
+#   system.))
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+node_name='node0',
+driver='copy-on-read',
+file={
+'driver': 'raw',
+'file': {
+'driver': 'copy-on-read',
+'file': {
+'driver': 'raw',
+'file': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': top_img_path
+ 

[Qemu-devel] [PATCH v2 4/9] block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes

2018-04-21 Thread Max Reitz
Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 block/io.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 134b2a498f..fada4efbf3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1115,13 +1115,15 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 /* FIXME: Should we (perhaps conditionally) be setting
  * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
  * that still correctly reads as zero? */
-ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
+ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+   BDRV_REQ_WRITE_UNCHANGED);
 } else {
 /* This does not change the data on the disk, it is not
  * necessary to flush even in cache=writethrough mode.
  */
 ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
-  &local_qiov, 0);
+  &local_qiov,
+  BDRV_REQ_WRITE_UNCHANGED);
 }
 
 if (ret < 0) {
-- 
2.14.3




[Qemu-devel] [PATCH v2 6/9] block: Support BDRV_REQ_WRITE_UNCHANGED in filters

2018-04-21 Thread Max Reitz
Update the rest of the filter drivers to support
BDRV_REQ_WRITE_UNCHANGED.  They already forward write request flags to
their children, so we just have to announce support for it.

This patch does not cover the replication driver because that currently
does not support flags at all, and because it just grabs the WRITE
permission for its children when it can, so we should be fine just
submitting the incoming WRITE_UNCHANGED requests as normal writes.

It also does not cover format drivers for similar reasons.  They all use
bdrv_format_default_perms() as their .bdrv_child_perm() implementation
so they just always grab the WRITE permission for their file children
whenever possible.  In addition, it often would be difficult to
ascertain whether incoming unchanging writes end up as unchanging writes
in their files.  So we just leave them as normal potentially changing
writes.

Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 block/blkdebug.c |  9 +
 block/blkreplay.c|  3 +++
 block/blkverify.c|  3 +++
 block/copy-on-read.c | 10 ++
 block/mirror.c   |  2 ++
 block/raw-format.c   |  9 +
 block/throttle.c |  6 --
 7 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 589712475a..762ec2527c 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto out;
 }
 
-bs->supported_write_flags = BDRV_REQ_FUA &
-bs->file->bs->supported_write_flags;
-bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-bs->file->bs->supported_zero_flags;
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+bs->file->bs->supported_zero_flags);
 ret = -EINVAL;
 
 /* Set alignment overrides */
diff --git a/block/blkreplay.c b/block/blkreplay.c
index fe5a9b4a98..b016dbeee7 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto fail;
 }
 
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
 ret = 0;
 fail:
 return ret;
diff --git a/block/blkverify.c b/block/blkverify.c
index 331365be33..1646404b46 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto fail;
 }
 
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
+
 ret = 0;
 fail:
 qemu_opts_del(opts);
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 823ec751c4..6a9720 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -33,11 +33,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-bs->supported_write_flags = BDRV_REQ_FUA &
-bs->file->bs->supported_write_flags;
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+(BDRV_REQ_FUA &
+bs->file->bs->supported_write_flags);
 
-bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-bs->file->bs->supported_zero_flags;
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+   ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+bs->file->bs->supported_zero_flags);
 
 return 0;
 }
diff --git a/block/mirror.c b/block/mirror.c
index 820f512c7b..6fdd36d27f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1148,6 +1148,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 mirror_top_bs->implicit = true;
 }
 mirror_top_bs->total_sectors = bs->total_sectors;
+mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
 bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
 /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
diff --git a/block/raw-format.c b/block/raw-format.c
index a378547c99..fe33693a2d 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -415,10 +415,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 bs->sg = bs->file->bs->sg;
-bs->supported_write_flags = BDRV_REQ_FUA &
-bs->file->bs->supported_write_flags;
-bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
-bs->file->bs->supported_zero_flags;
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHAN

[Qemu-devel] [PATCH v2 3/9] block: Add BDRV_REQ_WRITE_UNCHANGED flag

2018-04-21 Thread Max Reitz
This flag signifies that a write request will not change the visible
disk content.  With this flag set, it is sufficient to have the
BLK_PERM_WRITE_UNCHANGED permission instead of BLK_PERM_WRITE.

Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 include/block/block.h | 6 +-
 block/io.c| 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 397b5e8d44..3894edda9d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -54,8 +54,12 @@ typedef enum {
 BDRV_REQ_FUA= 0x10,
 BDRV_REQ_WRITE_COMPRESSED   = 0x20,
 
+/* Signifies that this write request will not change the visible disk
+ * content. */
+BDRV_REQ_WRITE_UNCHANGED= 0x40,
+
 /* Mask of valid flags */
-BDRV_REQ_MASK   = 0x3f,
+BDRV_REQ_MASK   = 0x7f,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
diff --git a/block/io.c b/block/io.c
index bd9a19a9c4..134b2a498f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1501,7 +1501,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 assert(!waited || !req->serialising);
 assert(req->overlap_offset <= offset);
 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-assert(child->perm & BLK_PERM_WRITE);
+if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+} else {
+assert(child->perm & BLK_PERM_WRITE);
+}
 assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
 
 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
-- 
2.14.3




[Qemu-devel] [PATCH v2 5/9] block/quorum: Support BDRV_REQ_WRITE_UNCHANGED

2018-04-21 Thread Max Reitz
We just need to forward it to quorum's children (except in case of a
rewrite because of corruption), but for that we first have to support
flags in child requests at all.

Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 block/quorum.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/block/quorum.c b/block/quorum.c
index cfe484a945..8cd689b2c1 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -115,6 +115,7 @@ struct QuorumAIOCB {
 /* Request metadata */
 uint64_t offset;
 uint64_t bytes;
+int flags;
 
 QEMUIOVector *qiov; /* calling IOV */
 
@@ -157,7 +158,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, 
QuorumVoteValue *b)
 static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
QEMUIOVector *qiov,
uint64_t offset,
-   uint64_t bytes)
+   uint64_t bytes,
+   int flags)
 {
 BDRVQuorumState *s = bs->opaque;
 QuorumAIOCB *acb = g_new(QuorumAIOCB, 1);
@@ -168,6 +170,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs,
 .bs = bs,
 .offset = offset,
 .bytes  = bytes,
+.flags  = flags,
 .qiov   = qiov,
 .votes.compare  = quorum_sha256_compare,
 .votes.vote_list= QLIST_HEAD_INITIALIZER(acb.votes.vote_list),
@@ -271,9 +274,11 @@ static void quorum_rewrite_entry(void *opaque)
 BDRVQuorumState *s = acb->bs->opaque;
 
 /* Ignore any errors, it's just a correction attempt for already
- * corrupted data. */
+ * corrupted data.
+ * Mask out BDRV_REQ_WRITE_UNCHANGED because this overwrites the
+ * area with different data from the other children. */
 bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes,
-acb->qiov, 0);
+acb->qiov, acb->flags & ~BDRV_REQ_WRITE_UNCHANGED);
 
 /* Wake up the caller after the last rewrite */
 acb->rewrite_count--;
@@ -673,7 +678,7 @@ static int quorum_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 BDRVQuorumState *s = bs->opaque;
-QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
 int ret;
 
 acb->is_read = true;
@@ -699,7 +704,7 @@ static void write_quorum_entry(void *opaque)
 
 sacb->bs = s->children[i]->bs;
 sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes,
-acb->qiov, 0);
+acb->qiov, acb->flags);
 if (sacb->ret == 0) {
 acb->success_count++;
 } else {
@@ -719,7 +724,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 BDRVQuorumState *s = bs->opaque;
-QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes);
+QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
 int i, ret;
 
 for (i = 0; i < s->num_children; i++) {
@@ -961,6 +966,8 @@ static int quorum_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 s->next_child_index = s->num_children;
 
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
+
 g_free(opened);
 goto exit;
 
-- 
2.14.3




[Qemu-devel] [PATCH v2 7/9] iotests: Clean up wrap image in 197

2018-04-21 Thread Max Reitz
Signed-off-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alberto Garcia 
---
 tests/qemu-iotests/197 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
index 5e869fe2b7..3ae4975eec 100755
--- a/tests/qemu-iotests/197
+++ b/tests/qemu-iotests/197
@@ -44,6 +44,7 @@ esac
 _cleanup()
 {
 _cleanup_test_img
+rm -f "$TEST_WRAP"
 rm -f "$BLKDBG_CONF"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
-- 
2.14.3




[Qemu-devel] [PATCH v2 8/9] iotests: Copy 197 for COR filter driver

2018-04-21 Thread Max Reitz
iotest 197 tests copy-on-read using the (now old) copy-on-read flag.
Copy it to 215 and modify it to use the COR filter driver instead.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/215 | 120 +
 tests/qemu-iotests/215.out |  26 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 147 insertions(+)
 create mode 100755 tests/qemu-iotests/215
 create mode 100644 tests/qemu-iotests/215.out

diff --git a/tests/qemu-iotests/215 b/tests/qemu-iotests/215
new file mode 100755
index 00..2e616ed659
--- /dev/null
+++ b/tests/qemu-iotests/215
@@ -0,0 +1,120 @@
+#!/bin/bash
+#
+# Test case for copy-on-read into qcow2, using the COR filter driver
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+TEST_WRAP="$TEST_DIR/t.wrap.qcow2"
+BLKDBG_CONF="$TEST_DIR/blkdebug.conf"
+
+# Sanity check: our use of blkdebug fails if $TEST_DIR contains spaces
+# or other problems
+case "$TEST_DIR" in
+*[^-_a-zA-Z0-9/]*)
+_notrun "Suspicious TEST_DIR='$TEST_DIR', cowardly refusing to run" ;;
+esac
+
+_cleanup()
+{
+_cleanup_test_img
+rm -f "$TEST_WRAP"
+rm -f "$BLKDBG_CONF"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# Test is supported for any backing file; but we force qcow2 for our wrapper.
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+# LUKS support may be possible, but it complicates things.
+_unsupported_fmt luks
+
+echo
+echo '=== Copy-on-read ==='
+echo
+
+# Prep the images
+# VPC rounds image sizes to a specific geometry, force a specific size.
+if [ "$IMGFMT" = "vpc" ]; then
+IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
+fi
+_make_test_img 4G
+$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
+IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
+_make_test_img -F "$IMGFMT" -b "$TEST_IMG" | _filter_img_create
+$QEMU_IO -f qcow2 -c "write -z -u 1M 64k" "$TEST_WRAP" | _filter_qemu_io
+
+# Ensure that a read of two clusters, but where one is already allocated,
+# does not re-write the allocated cluster
+cat > "$BLKDBG_CONF" <&1 | _filter_qemu_io)
+case $output in
+*allocate*)
+_notrun "Insufficent memory to run test" ;;
+*) printf '%s\n' "$output" ;;
+esac
+$QEMU_IO \
+-c "open -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+-c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \
+| _filter_qemu_io
+
+# Copy-on-read is incompatible with read-only
+$QEMU_IO \
+-c "open -r -o driver=copy-on-read,file.driver=qcow2 $TEST_WRAP" \
+2>&1 | _filter_testdir
+
+# Break the backing chain, and show that images are identical, and that
+# we properly copied over explicit zeros.
+$QEMU_IMG rebase -u -b "" -f qcow2 "$TEST_WRAP"
+$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
+_check_test_img
+$QEMU_IMG compare -f $IMGFMT -F qcow2 "$TEST_IMG" "$TEST_WRAP"
+
+# success, all done
+echo '*** done'
+status=0
diff --git a/tests/qemu-iotests/215.out b/tests/qemu-iotests/215.out
new file mode 100644
index 00..70b0f5fb19
--- /dev/null
+++ b/tests/qemu-iotests/215.out
@@ -0,0 +1,26 @@
+QA output created by 215
+
+=== Copy-on-read ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296
+wrote 1024/1024 bytes at offset 3221225472
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=4294967296 
backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
+wrote 65536/65536 bytes at offset 1048576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 1048576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 0/0 bytes at offset 0
+0 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2147483136/2147483136 bytes at offset 1024
+2 GiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 3221226496
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+can't open device TEST_DIR/t.wrap.qcow2: Block node is read-only
+2 GiB (0x8001) bytes allocated at offset 0 bytes (0x0)
+1023.938 MiB (0x3fff) bytes not allocated at offset 2 GiB (0x8001)
+64 KiB (0x1) bytes allocated at offset 3 GiB 

Re: [Qemu-devel] [PATCH] qemu-img: Check post-truncation size

2018-04-21 Thread Eric Blake
On 04/20/2018 05:53 PM, Max Reitz wrote:
> Some block drivers (iscsi and file-posix when dealing with device files)
> do not actually support truncation, even though they provide a
> .bdrv_truncate() method and will happily return success when providing a
> new size that does not exceed the current size.  This is because these
> drivers expect the user to resize the image outside of qemu and then
> provide qemu with that information through the block_resize command
> (compare cb1b83e740384b4e0d950f3d7c81c02b8ce86c2e).
> 
> Of course, anyone using qemu-img resize will find that behavior useless.
> So we should check the actual size of the image after the supposedly
> successful truncation took place, emit an error if nothing changed and
> emit a warning if the target size was not met.
> 
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1523065
> Signed-off-by: Max Reitz 
> ---
> Testing this is not quite trivial.  Or, well, it is, but you need either
> an iscsi test server or root access.

Or, you need NBD to document and implement NBD_CMD_RESIZE, and then the
nbd driver will support .bdrv_truncate() but fail when talking to a
server that doesn't actually resize after all.

> 
> Because in my opinion iotests that require root access are never run, I
> decided against writing such a test case.

So maybe when I get around to adding NBD resize support, I should add
such a test ;)


> +if (new_size != total_size && new_size == current_size) {
> +error_report("Image was not resized. Resizing may not be supported "
> + "for this image.");

error_report() generally does not have trailing dot, and generally has a
single sentence.  Would this be better as:

Image was not resized; resizing may not be supported for this image

> +ret = -1;
> +goto out;
> +}
> +
> +if (new_size != total_size) {
> +warn_report("Image should have been resized to %" PRIi64
> +" bytes, but was resized to %" PRIi64 " bytes.",
> +total_size, new_size);

Trailing dot again.  Also, PRId64 is much more common than PRIi64, even
though the two are identical in behavior.

But the idea makes sense to me.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] qemu-img: Check post-truncation size

2018-04-21 Thread Max Reitz
On 2018-04-21 17:35, Eric Blake wrote:
> On 04/20/2018 05:53 PM, Max Reitz wrote:
>> Some block drivers (iscsi and file-posix when dealing with device files)
>> do not actually support truncation, even though they provide a
>> .bdrv_truncate() method and will happily return success when providing a
>> new size that does not exceed the current size.  This is because these
>> drivers expect the user to resize the image outside of qemu and then
>> provide qemu with that information through the block_resize command
>> (compare cb1b83e740384b4e0d950f3d7c81c02b8ce86c2e).
>>
>> Of course, anyone using qemu-img resize will find that behavior useless.
>> So we should check the actual size of the image after the supposedly
>> successful truncation took place, emit an error if nothing changed and
>> emit a warning if the target size was not met.
>>
>> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1523065
>> Signed-off-by: Max Reitz 
>> ---
>> Testing this is not quite trivial.  Or, well, it is, but you need either
>> an iscsi test server or root access.
> 
> Or, you need NBD to document and implement NBD_CMD_RESIZE, and then the
> nbd driver will support .bdrv_truncate() but fail when talking to a
> server that doesn't actually resize after all.

I suppose the NBD client would recognize that, though, and return an
error code (and set *errp).  The issue in this case is that the drivers
in question pretend that everything went according to plan (they return
success) when actually nothing was resized at all.

>>
>> Because in my opinion iotests that require root access are never run, I
>> decided against writing such a test case.
> 
> So maybe when I get around to adding NBD resize support, I should add
> such a test ;)
> 
> 
>> +if (new_size != total_size && new_size == current_size) {
>> +error_report("Image was not resized. Resizing may not be supported "
>> + "for this image.");
> 
> error_report() generally does not have trailing dot, and generally has a
> single sentence.  Would this be better as:
> 
> Image was not resized; resizing may not be supported for this image

Yes, it would.  I just made this a qprintf() in the first version and
forgot to change it when making it an error_report().

>> +ret = -1;
>> +goto out;
>> +}
>> +
>> +if (new_size != total_size) {
>> +warn_report("Image should have been resized to %" PRIi64
>> +" bytes, but was resized to %" PRIi64 " bytes.",
>> +total_size, new_size);
> 
> Trailing dot again.

Same here, yes.

>  Also, PRId64 is much more common than PRIi64, even
> though the two are identical in behavior.

:-(

But I like my %i!

> But the idea makes sense to me.

OK, thanks.

Max



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v2] qemu-img: Check post-truncation size

2018-04-21 Thread Max Reitz
Some block drivers (iscsi and file-posix when dealing with device files)
do not actually support truncation, even though they provide a
.bdrv_truncate() method and will happily return success when providing a
new size that does not exceed the current size.  This is because these
drivers expect the user to resize the image outside of qemu and then
provide qemu with that information through the block_resize command
(compare cb1b83e740384b4e0d950f3d7c81c02b8ce86c2e).

Of course, anyone using qemu-img resize will find that behavior useless.
So we should check the actual size of the image after the supposedly
successful truncation took place, emit an error if nothing changed and
emit a warning if the target size was not met.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1523065
Signed-off-by: Max Reitz 
---
v2: Drop dots in {error,warn}_report() messages [Eric]

v1: http://lists.nongnu.org/archive/html/qemu-block/2018-04/msg00441.html
---
 qemu-img.c | 39 +++
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 855fa52514..83208873a5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3381,7 +3381,7 @@ static int img_resize(int argc, char **argv)
 Error *err = NULL;
 int c, ret, relative;
 const char *filename, *fmt, *size;
-int64_t n, total_size, current_size;
+int64_t n, total_size, current_size, new_size;
 bool quiet = false;
 BlockBackend *blk = NULL;
 PreallocMode prealloc = PREALLOC_MODE_OFF;
@@ -3557,11 +3557,42 @@ static int img_resize(int argc, char **argv)
 }
 
 ret = blk_truncate(blk, total_size, prealloc, &err);
-if (!ret) {
-qprintf(quiet, "Image resized.\n");
-} else {
+if (ret < 0) {
 error_report_err(err);
+goto out;
+}
+
+new_size = blk_getlength(blk);
+if (new_size < 0) {
+error_report("Failed to verify truncated image length: %s",
+ strerror(-new_size));
+ret = -1;
+goto out;
 }
+
+/* Some block drivers implement a truncation method, but only so
+ * the user can cause qemu to refresh the image's size from disk.
+ * The idea is that the user resizes the image outside of qemu and
+ * then invokes block_resize to inform qemu about it.
+ * (This includes iscsi and file-posix for device files.)
+ * Of course, that is not the behavior someone invoking
+ * qemu-img resize would find useful, so we catch that behavior
+ * here and tell the user. */
+if (new_size != total_size && new_size == current_size) {
+error_report("Image was not resized; resizing may not be supported "
+ "for this image");
+ret = -1;
+goto out;
+}
+
+if (new_size != total_size) {
+warn_report("Image should have been resized to %" PRIi64
+" bytes, but was resized to %" PRIi64 " bytes",
+total_size, new_size);
+}
+
+qprintf(quiet, "Image resized.\n");
+
 out:
 blk_unref(blk);
 if (ret) {
-- 
2.14.3




[Qemu-devel] [PATCH 2/7] block: Add Error parameter to bdrv_amend_options

2018-04-21 Thread Max Reitz
Looking at the qcow2 code that is riddled with error_report() calls,
this is really how it should have been from the start.

Signed-off-by: Max Reitz 
---
 include/block/block.h  |  3 +-
 include/block/block_int.h  |  3 +-
 block.c|  8 --
 block/qcow2.c  | 72 ++
 qemu-img.c |  4 +--
 tests/qemu-iotests/060.out |  4 +--
 tests/qemu-iotests/061.out |  7 -
 tests/qemu-iotests/080.out |  4 +--
 tests/qemu-iotests/112.out |  3 --
 9 files changed, 64 insertions(+), 44 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index cdec3639a3..ba9cfec384 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -336,7 +336,8 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, 
BdrvCheckMode fix);
 typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
   int64_t total_work_size, void *opaque);
 int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
-   BlockDriverAmendStatusCB *status_cb, void *cb_opaque);
+   BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+   Error **errp);
 
 /* external snapshots */
 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c4dd1d4bb8..d913ed1ea1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -324,7 +324,8 @@ struct BlockDriver {
 
 int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
   BlockDriverAmendStatusCB *status_cb,
-  void *cb_opaque);
+  void *cb_opaque,
+  Error **errp);
 
 void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
 
diff --git a/block.c b/block.c
index a2caadf0a0..fefe1109eb 100644
--- a/block.c
+++ b/block.c
@@ -4996,15 +4996,19 @@ void bdrv_remove_aio_context_notifier(BlockDriverState 
*bs,
 }
 
 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
-   BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
+   BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+   Error **errp)
 {
 if (!bs->drv) {
+error_setg(errp, "Node is ejected");
 return -ENOMEDIUM;
 }
 if (!bs->drv->bdrv_amend_options) {
+error_setg(errp, "Block driver '%s' does not support option amendment",
+   bs->drv->format_name);
 return -ENOTSUP;
 }
-return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
+return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp);
 }
 
 /* This function will be called by the bdrv_recurse_is_first_non_filter method
diff --git a/block/qcow2.c b/block/qcow2.c
index 091088e09e..eb86ba0a2a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4049,7 +4049,8 @@ static int qcow2_load_vmstate(BlockDriverState *bs, 
QEMUIOVector *qiov,
  * have to be removed.
  */
 static int qcow2_downgrade(BlockDriverState *bs, int target_version,
-   BlockDriverAmendStatusCB *status_cb, void 
*cb_opaque)
+   BlockDriverAmendStatusCB *status_cb, void 
*cb_opaque,
+   Error **errp)
 {
 BDRVQcow2State *s = bs->opaque;
 int current_version = s->qcow_version;
@@ -4058,13 +4059,17 @@ static int qcow2_downgrade(BlockDriverState *bs, int 
target_version,
 if (target_version == current_version) {
 return 0;
 } else if (target_version > current_version) {
+error_setg(errp, "Cannot downgrade an image from version %i to %i",
+   current_version, target_version);
 return -EINVAL;
 } else if (target_version != 2) {
+error_setg(errp, "Cannot downgrade an image to version %i (only "
+   "target version 2 is supported)", target_version);
 return -EINVAL;
 }
 
 if (s->refcount_order != 4) {
-error_report("compat=0.10 requires refcount_bits=16");
+error_setg(errp, "compat=0.10 requires refcount_bits=16");
 return -ENOTSUP;
 }
 
@@ -4072,6 +4077,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int 
target_version,
 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
 ret = qcow2_mark_clean(bs);
 if (ret < 0) {
+error_setg(errp, "Failed to make the image clean: %s",
+   strerror(-ret));
 return ret;
 }
 }
@@ -4081,6 +4088,8 @@ static int qcow2_downgrade(BlockDriverState *bs, int 
target_version,
  * best thing to do anyway */
 
 if (s->incompatible_features) {
+error_setg(errp, "Cannot downgrade an image with incompatible features 
"
+   "%#" PRIx64 " set", s->incompatible_features);
 return -ENOTSUP;
 }

[Qemu-devel] [PATCH 3/7] qemu-option: Pull out "Supported options" print

2018-04-21 Thread Max Reitz
It really is up to the caller to decide what this list of options means.

Signed-off-by: Max Reitz 
---
 qemu-img.c | 1 +
 util/qemu-option.c | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 375fe852e0..6dd8e95bb2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -261,6 +261,7 @@ static int print_block_option_help(const char *filename, 
const char *fmt)
 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 }
 
+printf("Supported options:\n");
 qemu_opts_print_help(create_opts);
 qemu_opts_free(create_opts);
 return 0;
diff --git a/util/qemu-option.c b/util/qemu-option.c
index d0756fda58..95e6cf4e49 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -218,7 +218,6 @@ void qemu_opts_print_help(QemuOptsList *list)
 
 assert(list);
 desc = list->desc;
-printf("Supported options:\n");
 while (desc && desc->name) {
 printf("%-16s %s\n", desc->name,
desc->help ? desc->help : "No description available");
-- 
2.14.3




[Qemu-devel] [PATCH 5/7] qemu-img: Recognize no creation support in -o help

2018-04-21 Thread Max Reitz
The only users of print_block_option_help() are qemu-img create and
qemu-img convert for the output image, so this function is always used
for image creation (it used to be used for amendment also, but that is
no longer the case).

So if image creation is not supported by either the format or the
protocol, there is no need to print any option description, because the
user cannot create an image like this anyway.

This also fixes an assertion failure:

$ qemu-img create -f bochs -o help
Supported options:
qemu-img: util/qemu-option.c:219:
qemu_opts_print_help: Assertion `list' failed.
[1]24831 abort (core dumped)  qemu-img create -f bochs -o help

Signed-off-by: Max Reitz 
---
 qemu-img.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/qemu-img.c b/qemu-img.c
index 45e243cc80..29efd80c35 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -250,6 +250,11 @@ static int print_block_option_help(const char *filename, 
const char *fmt)
 return 1;
 }
 
+if (!drv->create_opts) {
+error_report("Format driver '%s' does not support image creation", 
fmt);
+return 1;
+}
+
 create_opts = qemu_opts_append(create_opts, drv->create_opts);
 if (filename) {
 proto_drv = bdrv_find_protocol(filename, true, &local_err);
@@ -258,6 +263,11 @@ static int print_block_option_help(const char *filename, 
const char *fmt)
 qemu_opts_free(create_opts);
 return 1;
 }
+if (!proto_drv->create_opts) {
+error_report("Protocal driver '%s' does not support image 
creation",
+ proto_drv->format_name);
+return 1;
+}
 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
 }
 
-- 
2.14.3




[Qemu-devel] [PATCH 1/7] qemu-img: Amendment support implies create_opts

2018-04-21 Thread Max Reitz
Instead of checking whether a driver has a non-NULL create_opts we
should check whether it supports image amendment in the first place.  If
it does, it must have create_opts.

On the other hand, if it does not have create_opts (so it does not
support amendment either), the error message "does not support any
options" is a bit useless.  Stating clearly that the driver has no
amendment support whatsoever is probably better.

Signed-off-by: Max Reitz 
---
 qemu-img.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 855fa52514..f60b22769e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3709,13 +3709,16 @@ static int img_amend(int argc, char **argv)
 goto out;
 }
 
-if (!bs->drv->create_opts) {
-error_report("Format driver '%s' does not support any options to 
amend",
+if (!bs->drv->bdrv_amend_options) {
+error_report("Format driver '%s' does not support option amendment",
  fmt);
 ret = -1;
 goto out;
 }
 
+/* Every driver supporting amendment must have create_opts */
+assert(bs->drv->create_opts);
+
 create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
 qemu_opts_do_parse(opts, options, NULL, &err);
-- 
2.14.3




[Qemu-devel] [PATCH 0/7] qemu-img: Improve option help for amend

2018-04-21 Thread Max Reitz
Currently, "qemu-img amend -f $format -o help" prints many things which
it claims to be supported, but most of the time it's wrong.  Usually
that starts with the format already: No format but qcow2 supports option
amendment, so we should never claim that a format supports any options
when it actually does not support amendment in the first place.

It goes on with the options themselves.  The qcow2 driver does not
support amendment of all creation options, so we should not claim it
does.  Actually knowing which formats are supported exactly would be a
bit difficult (this would probably involve adding a field to
QemuOptDesc, and I don't really want to do that), but what we can do
instead is to at least advise the user that the options we print are all
of the creation options and that we might not support amending all of
them.

There is a Bugzilla for this here:
https://bugzilla.redhat.com/show_bug.cgi?id=1537956

On the way to address these issues, there is more to be done, though.
bdrv_amend_options() does not have an Error parameter yet, and there is
no real excuse for that.

Also, "qemu-img create -o help" has its own little issue with formats
that do not support creation:

$ qemu-img create -f bochs -o help
Supported options:
qemu-img: util/qemu-option.c:219:
qemu_opts_print_help: Assertion `list' failed.
[1]24831 abort (core dumped)  qemu-img create -f bochs -o help

Let's fix that, too.


Max Reitz (7):
  qemu-img: Amendment support implies create_opts
  block: Add Error parameter to bdrv_amend_options
  qemu-option: Pull out "Supported options" print
  qemu-img: Add print_amend_option_help()
  qemu-img: Recognize no creation support in -o help
  iotests: Test help option for unsupporting formats
  iotests: Rework 113

 include/block/block.h  |  3 +-
 include/block/block_int.h  |  3 +-
 block.c|  8 --
 block/qcow2.c  | 72 ++
 qemu-img.c | 52 +
 util/qemu-option.c |  1 -
 tests/qemu-iotests/060.out |  4 +--
 tests/qemu-iotests/061.out |  7 -
 tests/qemu-iotests/080.out |  4 +--
 tests/qemu-iotests/082 |  9 ++
 tests/qemu-iotests/082.out | 53 +++---
 tests/qemu-iotests/112.out |  3 --
 tests/qemu-iotests/113 | 19 ++--
 tests/qemu-iotests/113.out |  7 +++--
 14 files changed, 166 insertions(+), 79 deletions(-)

-- 
2.14.3




[Qemu-devel] [PATCH 6/7] iotests: Test help option for unsupporting formats

2018-04-21 Thread Max Reitz
This adds test cases to 082 for qemu-img create/convert/amend "-o help"
on formats that do not support creation or amendment, respectively.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/082 | 9 +
 tests/qemu-iotests/082.out | 9 +
 2 files changed, 18 insertions(+)

diff --git a/tests/qemu-iotests/082 b/tests/qemu-iotests/082
index d5c83d45ed..a872f771a6 100755
--- a/tests/qemu-iotests/082
+++ b/tests/qemu-iotests/082
@@ -97,6 +97,9 @@ run_qemu_img create -f $IMGFMT -o backing_file="$TEST_IMG" -o 
,, -o help "$TEST_
 run_qemu_img create -f $IMGFMT -o help
 run_qemu_img create -o help
 
+# Try help option for a format that does not support creation
+run_qemu_img create -f bochs -o help
+
 echo
 echo === convert: Options specified more than once ===
 
@@ -151,6 +154,9 @@ run_qemu_img convert -O $IMGFMT -o backing_file="$TEST_IMG" 
-o ,, -o help "$TEST
 run_qemu_img convert -O $IMGFMT -o help
 run_qemu_img convert -o help
 
+# Try help option for a format that does not support creation
+run_qemu_img convert -O bochs -o help
+
 echo
 echo === amend: Options specified more than once ===
 
@@ -201,6 +207,9 @@ run_qemu_img amend -f $IMGFMT -o backing_file="$TEST_IMG" 
-o ,, -o help "$TEST_I
 run_qemu_img amend -f $IMGFMT -o help
 run_qemu_img convert -o help
 
+# Try help option for a format that does not support amendment
+run_qemu_img amend -f bochs -o help
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out
index 4e52dce8d6..60ef87c276 100644
--- a/tests/qemu-iotests/082.out
+++ b/tests/qemu-iotests/082.out
@@ -249,6 +249,9 @@ Testing: create -o help
 Supported options:
 size Virtual disk size
 
+Testing: create -f bochs -o help
+qemu-img: Format driver 'bochs' does not support image creation
+
 === convert: Options specified more than once ===
 
 Testing: create -f qcow2 TEST_DIR/t.qcow2 128M
@@ -502,6 +505,9 @@ Testing: convert -o help
 Supported options:
 size Virtual disk size
 
+Testing: convert -O bochs -o help
+qemu-img: Format driver 'bochs' does not support image creation
+
 === amend: Options specified more than once ===
 
 Testing: amend -f foo -f qcow2 -o lazy_refcounts=on TEST_DIR/t.qcow2
@@ -763,4 +769,7 @@ Note that not all of these options may be amendable.
 Testing: convert -o help
 Supported options:
 size Virtual disk size
+
+Testing: amend -f bochs -o help
+qemu-img: Format driver 'bochs' does not support option amendment
 *** done
-- 
2.14.3




[Qemu-devel] [PATCH 7/7] iotests: Rework 113

2018-04-21 Thread Max Reitz
This test case has been broken since 398e6ad014df261d (roughly half a
year).  qemu-img amend requires its output image to be R/W, so it opens
it as such; the node is then turned into an read-only node automatically
which is now accompanied by a warning, however.  This warning has not
been part of the reference output.

For one thing, this warning shows that we cannot keep the test case as
it is.  We would need a format that has no create_opts but that does
have write support -- we do not have such a format, though.

Another thing is that qemu now actually checks whether an image format
supports amendment instead of whether it has create_opts (since the
former always implies the latter).  So we can now use any format that
does not support amendment (even if it supports creation) and thus test
the same code path.

The reason nobody has noticed the breakage until now of course is the
fact that nobody runs the iotests for nbd+bochs.  There actually was
never any reason to set the protocol to "nbd" but because that was
technically correct; functionally it made no difference.  So that is the
first thing we are going to change: Make the protocol "file" instead so
that people might actually notice breakage here.

Secondly, now that bochs no longer works for the amend test case, we
have to change the format there anyway.  Set let us just bend the truth
a bit, declare this test a raw test.  In fact, that does not even
concern the bochs test cases, other than the output now reading 'bochs'
instead of 'IMGFMT'.

So with this test now being a raw test, we can rework the amend test
case to use raw instead.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/113 | 19 +--
 tests/qemu-iotests/113.out |  7 ---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/qemu-iotests/113 b/tests/qemu-iotests/113
index 19b68b2727..4e09810905 100755
--- a/tests/qemu-iotests/113
+++ b/tests/qemu-iotests/113
@@ -38,16 +38,17 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.rc
 . ./common.filter
 
-# We can only test one format here because we need its sample file
-_supported_fmt bochs
-_supported_proto nbd
+# Some of these test cases use bochs, but others do use raw, so this
+# is only half a lie.
+_supported_fmt raw
+_supported_proto file
 _supported_os Linux
 
 echo
 echo '=== Unsupported image creation in qemu-img create ==='
 echo
 
-$QEMU_IMG create -f $IMGFMT nbd://example.com 2>&1 64M | _filter_imgfmt
+$QEMU_IMG create -f bochs nbd://example.com 2>&1 64M
 
 echo
 echo '=== Unsupported image creation in qemu-img convert ==='
@@ -56,17 +57,15 @@ echo
 # We could use any input image format here, but this is a bochs test, so just
 # use the bochs image
 _use_sample_img empty.bochs.bz2
-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT "$TEST_IMG" nbd://example.com 2>&1 \
-| _filter_imgfmt
+$QEMU_IMG convert -f bochs -O bochs "$TEST_IMG" nbd://example.com
 
 echo
 echo '=== Unsupported format in qemu-img amend ==='
 echo
 
-# The protocol does not matter here
-_use_sample_img empty.bochs.bz2
-$QEMU_IMG amend -f $IMGFMT -o foo=bar "$TEST_IMG" 2>&1 | _filter_imgfmt
-
+TEST_IMG="$TEST_DIR/t.$IMGFMT"
+_make_test_img 1M
+$QEMU_IMG amend -f $IMGFMT -o size=2M "$TEST_IMG" 2>&1 | _filter_imgfmt
 
 # success, all done
 echo
diff --git a/tests/qemu-iotests/113.out b/tests/qemu-iotests/113.out
index 00bdfd6887..3557e2bbf0 100644
--- a/tests/qemu-iotests/113.out
+++ b/tests/qemu-iotests/113.out
@@ -2,14 +2,15 @@ QA output created by 113
 
 === Unsupported image creation in qemu-img create ===
 
-qemu-img: nbd://example.com: Format driver 'IMGFMT' does not support image 
creation
+qemu-img: nbd://example.com: Format driver 'bochs' does not support image 
creation
 
 === Unsupported image creation in qemu-img convert ===
 
-qemu-img: Format driver 'IMGFMT' does not support image creation
+qemu-img: Format driver 'bochs' does not support image creation
 
 === Unsupported format in qemu-img amend ===
 
-qemu-img: Format driver 'IMGFMT' does not support any options to amend
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+qemu-img: Format driver 'IMGFMT' does not support option amendment
 
 *** done
-- 
2.14.3




[Qemu-devel] [PATCH 4/7] qemu-img: Add print_amend_option_help()

2018-04-21 Thread Max Reitz
The more generic print_block_option_help() function is not really
suitable for qemu-img amend, for a couple of reasons:
(1) We do not need to append the protocol-level options, as amendment
happens only on one node and does not descend downwards to its
children.
(2) print_block_option_help() says those options are "supported".  For
option amendment, we do not really know that.  So this new function
explicitly says that those options are the creation options, and not
all of them may be supported.
(3) If the driver does not support option amendment, we should not print
anything (except for an error message that amendment is not
supported).

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1537956
Signed-off-by: Max Reitz 
---
 qemu-img.c | 30 --
 tests/qemu-iotests/082.out | 44 +++-
 2 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 6dd8e95bb2..45e243cc80 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3578,6 +3578,32 @@ static void amend_status_cb(BlockDriverState *bs,
 qemu_progress_print(100.f * offset / total_work_size, 0);
 }
 
+static int print_amend_option_help(const char *format)
+{
+BlockDriver *drv;
+
+/* Find driver and parse its options */
+drv = bdrv_find_format(format);
+if (!drv) {
+error_report("Unknown file format '%s'", format);
+return 1;
+}
+
+if (!drv->bdrv_amend_options) {
+error_report("Format driver '%s' does not support option amendment",
+ format);
+return 1;
+}
+
+/* Every driver supporting amendment must have create_opts */
+assert(drv->create_opts);
+
+printf("Creation options for '%s':\n", format);
+qemu_opts_print_help(drv->create_opts);
+printf("\nNote that not all of these options may be amendable.\n");
+return 0;
+}
+
 static int img_amend(int argc, char **argv)
 {
 Error *err = NULL;
@@ -3677,7 +3703,7 @@ static int img_amend(int argc, char **argv)
 if (fmt && has_help_option(options)) {
 /* If a format is explicitly specified (and possibly no filename is
  * given), print option help here */
-ret = print_block_option_help(filename, fmt);
+ret = print_amend_option_help(fmt);
 goto out;
 }
 
@@ -3706,7 +3732,7 @@ static int img_amend(int argc, char **argv)
 
 if (has_help_option(options)) {
 /* If the format was auto-detected, print option help here */
-ret = print_block_option_help(filename, fmt);
+ret = print_amend_option_help(fmt);
 goto out;
 }
 
diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out
index 1527fbe1b7..4e52dce8d6 100644
--- a/tests/qemu-iotests/082.out
+++ b/tests/qemu-iotests/082.out
@@ -546,7 +546,7 @@ cluster_size: 65536
 === amend: help for -o ===
 
 Testing: amend -f qcow2 -o help TEST_DIR/t.qcow2
-Supported options:
+Creation options for 'qcow2':
 size Virtual disk size
 compat   Compatibility level (0.10 or 1.1)
 backing_file File name of a base image
@@ -564,10 +564,11 @@ cluster_size qcow2 cluster size
 preallocationPreallocation mode (allowed values: off, metadata, falloc, 
full)
 lazy_refcounts   Postpone refcount updates
 refcount_bitsWidth of a reference count entry in bits
-nocowTurn off copy-on-write (valid only on btrfs)
+
+Note that not all of these options may be amendable.
 
 Testing: amend -f qcow2 -o ? TEST_DIR/t.qcow2
-Supported options:
+Creation options for 'qcow2':
 size Virtual disk size
 compat   Compatibility level (0.10 or 1.1)
 backing_file File name of a base image
@@ -585,10 +586,11 @@ cluster_size qcow2 cluster size
 preallocationPreallocation mode (allowed values: off, metadata, falloc, 
full)
 lazy_refcounts   Postpone refcount updates
 refcount_bitsWidth of a reference count entry in bits
-nocowTurn off copy-on-write (valid only on btrfs)
+
+Note that not all of these options may be amendable.
 
 Testing: amend -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2
-Supported options:
+Creation options for 'qcow2':
 size Virtual disk size
 compat   Compatibility level (0.10 or 1.1)
 backing_file File name of a base image
@@ -606,10 +608,11 @@ cluster_size qcow2 cluster size
 preallocationPreallocation mode (allowed values: off, metadata, falloc, 
full)
 lazy_refcounts   Postpone refcount updates
 refcount_bitsWidth of a reference count entry in bits
-nocowTurn off copy-on-write (valid only on btrfs)
+
+Note that not all of these options may be amendable.
 
 Testing: amend -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2
-Supported options:
+Creation options for 'qcow2':
 size Virtual disk size
 compat   Compatibility level (0.10 or 1.1)
 backing_file File name of a base image
@@ -627,10 +630,11 @@

Re: [Qemu-devel] [PATCH V6 11/17] qapi: Add new command to query colo status

2018-04-21 Thread Zhang Chen
On Tue, Apr 17, 2018 at 8:46 PM, Markus Armbruster 
wrote:

> Zhang Chen  writes:
>
> > Libvirt or other high level software can use this command query colo
> status.
> > You can test this command like that:
> > {'execute':'query-colo-status'}
> >
> > Signed-off-by: Zhang Chen 
> > ---
> >  migration/colo.c| 31 +++
> >  qapi/migration.json | 33 +
> >  2 files changed, 64 insertions(+)
> >
> > diff --git a/migration/colo.c b/migration/colo.c
> > index 8ca6381..127db3c 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -237,6 +237,37 @@ void qmp_xen_colo_do_checkpoint(Error **errp)
> >  #endif
> >  }
> >
> > +COLOStatus *qmp_query_colo_status(Error **errp)
> > +{
> > +MigrationState *m;
> > +MigrationIncomingState *mis;
> > +COLOStatus *s = g_new0(COLOStatus, 1);
> > +
> > +if (get_colo_mode() == COLO_MODE_PRIMARY) {
> > +m = migrate_get_current();
> > +
> > +if (m->state == MIGRATION_STATUS_COLO) {
> > +s->colo_running = true;
> > +} else {
> > +s->colo_running = false;
> > +}
> > +s->mode = COLO_MODE_PRIMARY;
> > +} else {
>
> Can get_colo_mode() == COLO_MODE_UNKNOWN happen here?  If not, why not?
>

Good catch, I will check it in next version.


>
> > +mis = migration_incoming_get_current();
> > +
> > +if (mis->state == MIGRATION_STATUS_COLO) {
> > +s->colo_running = true;
> > +} else {
> > +s->colo_running = false;
> > +}
> > +s->mode = COLO_MODE_SECONDARY;
> > +}
>
> Simpler:
>
>if (get_colo_mode() == COLO_MODE_PRIMARY) {
>state = migrate_get_current()->state;
>} else {
>state = migration_incoming_get_current()->state;
>}
>s->colo_running = state == MIGRATION_STATUS_COLO;
>s->mode = get_colo_mode();
>

Good idea. I will use this codes in next version.
By the way, do I need add your SOB?


>
> > +
> > +s->reason = failover_get_state();
>
> COLOStatus member @reason is COLOExitReason, but failover_get_state()
> returns FailoverStatus, a different enum.  Am I confused?
>

No, here I want to reused the FailoverStatus, but missed something in
rebase progress.
I will make FailoverStatus's @none = COLOExitReason's @none,
  FailoverStatus's @requre = COLOExitReason's @request
  FailoverStatus's other status = COLOExitReason's @error.
in next version.



>
> > +
> > +return s;
> > +}
> > +
> >  static void colo_send_message(QEMUFile *f, COLOMessage msg,
> >Error **errp)
> >  {
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index 6c6c50e..8ccdd21 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -1201,3 +1201,36 @@
> >  # Since: 2.9
> >  ##
> >  { 'command': 'xen-colo-do-checkpoint' }
> > +
> > +##
> > +# @COLOStatus:
> > +#
> > +# The result format for 'query-colo-status'.
> > +#
> > +# @mode: which COLO mode the VM was in when it exited.
>
> This documents the meaning of @mode "when it exited".  Two questions:
>
> 1. What's "it"?  The VM?  COLO?
>

COLO.


>
> 2. What's the meaning of @mode when "it" hasn't exited, yet?
>

Just return the COLO running mode(primary, secondary or unknown) when COLO
hasn't exited.


>
> > +#
> > +# @colo-running: if true means COLO running well, otherwise COLO have
> done.
>
> Suggest
>
># @colo-running: true if COLO is running
>
> > +#
> > +# @reason: describes the reason for the COLO exit.
>
> If there has been no COLO exit, there can't be a reason for one.  What's
> the value of @reason then?  Hmm, peeking at FailoverStatus makes me
> guess its @none.  Correct?
>

Yes, you are right. I will fix it in next version.

Thanks your comments.
Zhang Chen



>
> > +#
> > +# Since: 2.12
> > +##
> > +{ 'struct': 'COLOStatus',
> > +  'data': { 'mode': 'COLOMode', 'colo-running': 'bool', 'reason':
> 'COLOExitReason' } }
> > +
> > +##
> > +# @query-colo-status:
> > +#
> > +# Query COLO status while the vm is running.
> > +#
> > +# Returns: A @COLOStatus object showing the status.
> > +#
> > +# Example:
> > +#
> > +# -> { "execute": "query-colo-status" }
> > +# <- { "return": { "mode": "primary", "colo-running": true, "reason":
> "request" } }
> > +#
> > +# Since: 2.12
> > +##
> > +{ 'command': 'query-colo-status',
> > +  'returns': 'COLOStatus' }
>


Re: [Qemu-devel] [PATCH v3 38/41] hw/rdma: Use the BYTE-based definitions

2018-04-21 Thread Yuval Shaia
On Sun, Apr 15, 2018 at 08:43:04PM -0300, Philippe Mathieu-Daudé wrote:
> It eases code review, unit is explicit.
> 
> Patch generated using:
> 
>   $ git grep -E '(1024|2048|4096|8192|(<<|>>).?(10|20|30))' hw/ include/hw/
> 
> and modified manually.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/rdma/vmw/pvrdma.h | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
> index 8c173cb824..f17143f0e6 100644
> --- a/hw/rdma/vmw/pvrdma.h
> +++ b/hw/rdma/vmw/pvrdma.h
> @@ -16,6 +16,7 @@
>  #ifndef PVRDMA_PVRDMA_H
>  #define PVRDMA_PVRDMA_H
>  
> +#include "qemu/units.h"
>  #include "hw/pci/pci.h"
>  #include "hw/pci/msix.h"
>  
> @@ -30,7 +31,7 @@
>  #define RDMA_MSIX_BAR_IDX0
>  #define RDMA_REG_BAR_IDX 1
>  #define RDMA_UAR_BAR_IDX 2
> -#define RDMA_BAR0_MSIX_SIZE  (16 * 1024)
> +#define RDMA_BAR0_MSIX_SIZE  (16 * K_BYTE)
>  #define RDMA_BAR1_REGS_SIZE  256
>  #define RDMA_BAR2_UAR_SIZE   (0x1000 * MAX_UCS) /* each uc gets page */

Reviewed-by: Yuval Shaia 

>  
> -- 
> 2.17.0
> 



[Qemu-devel] [Bug 1765970] [NEW] qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to 2.11.x

2018-04-21 Thread diddly
Public bug reported:

I use a qemu-user chroot + binfmt to build software targetting a
raspberry pi.  After upgrading from qemu-2.10.1 to 2.11.1 (Gentoo host),
I noticed that on my uclibc-ng chroot qemu-arm will segfault when
running python and importing the portage module.

I have bisected qemu down to this commit:

https://github.com/qemu/qemu/commit/18e80c55bb6ec17c05ec0ba717ec83933c2bfc07

If I recompile and change MAX_RESERVED_VA (from the above commit) back
to 0x7700 the problem goes away.  NB I have no idea what that does,
I just thought I'd see.


Other arm chroots (glibc, musl) do not segfault with qemu-2.11, only the 
uclibc-ng one.  Not sure why.


The following backtrace was generated from running qemu-arm in gdb and 
recreating the segfault:

(gdb) where
#0  0x60726046 in static_code_gen_buffer ()
#1  0x60048789 in cpu_tb_exec (cpu=0x6278e310, 
itb=0x60725f80 )
at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:167
#2  0x6004937f in cpu_loop_exec_tb (cpu=0x6278e310, 
tb=0x60725f80 , last_tb=0x7fffd138, 
tb_exit=0x7fffd130)
at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:627
#3  0x60049600 in cpu_exec (cpu=0x6278e310)
at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:736
#4  0x600511c3 in cpu_loop (env=0x627965b0)
at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:585
#5  0x600534eb in main (argc=4, argv=0x7fffd9b8, 
envp=0x7fffd9e0)
at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:4882


(gdb) info reg
rax0x627965b0   1652123056
rbx0x62717870   1651603568
rcx0x606da000   1617797120
rdx0x60726000   1618108416
rsi0x60726000   1618108416
rdi0x627965b0   1652123056
rbp0x7fffd0c0   0x7fffd0c0
rsp0x7fffd080   0x7fffd080
r8 0x0  0
r9 0x2  2
r100x0  0
r110x629280a0   1653768352
r120x60260e40   1613106752
r130x0  0
r140x606a5018   1617580056
r150x0  0
rip0x60048789   0x60048789 
eflags 0x10282  [ SF IF RF ]
cs 0x33 51
ss 0x2b 43
ds 0x0  0
es 0x0  0
fs 0x0  0
gs 0x0  0
(gdb)

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1765970

Title:
  qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to
  2.11.x

Status in QEMU:
  New

Bug description:
  I use a qemu-user chroot + binfmt to build software targetting a
  raspberry pi.  After upgrading from qemu-2.10.1 to 2.11.1 (Gentoo
  host), I noticed that on my uclibc-ng chroot qemu-arm will segfault
  when running python and importing the portage module.

  I have bisected qemu down to this commit:

  https://github.com/qemu/qemu/commit/18e80c55bb6ec17c05ec0ba717ec83933c2bfc07

  If I recompile and change MAX_RESERVED_VA (from the above commit) back
  to 0x7700 the problem goes away.  NB I have no idea what that
  does, I just thought I'd see.

  
  Other arm chroots (glibc, musl) do not segfault with qemu-2.11, only the 
uclibc-ng one.  Not sure why.

  
  The following backtrace was generated from running qemu-arm in gdb and 
recreating the segfault:

  (gdb) where
  #0  0x60726046 in static_code_gen_buffer ()
  #1  0x60048789 in cpu_tb_exec (cpu=0x6278e310, 
  itb=0x60725f80 )
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:167
  #2  0x6004937f in cpu_loop_exec_tb (cpu=0x6278e310, 
  tb=0x60725f80 , last_tb=0x7fffd138, 
  tb_exit=0x7fffd130)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:627
  #3  0x60049600 in cpu_exec (cpu=0x6278e310)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:736
  #4  0x600511c3 in cpu_loop (env=0x627965b0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:585
  #5  0x600534eb in main (argc=4, argv=0x7fffd9b8, 
  envp=0x7fffd9e0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:4882


  (gdb) info reg
  rax0x627965b0   1652123056
  rbx0x62717870   1651603568
  rcx0x606da000   1617797120
  rdx0x60726000   1618108416
  rsi0x60726000   1618108416
  rdi0x627965b0   1652123056
  rbp0x7fffd0c0   0x7fffd0c0
  rsp0x7fffd080   0x7fffd080
  r8

Re: [Qemu-devel] [PATCH v2 1/1] migration: calculate expected_downtime with ram_bytes_remaining()

2018-04-21 Thread Balamuruhan S
On Thu, Apr 19, 2018 at 09:48:17PM +1000, David Gibson wrote:
> On Thu, Apr 19, 2018 at 10:14:52AM +0530, Balamuruhan S wrote:
> > On Wed, Apr 18, 2018 at 09:36:33AM +0100, Dr. David Alan Gilbert wrote:
> > > * Balamuruhan S (bal...@linux.vnet.ibm.com) wrote:
> > > > On Wed, Apr 18, 2018 at 10:57:26AM +1000, David Gibson wrote:
> > > > > On Wed, Apr 18, 2018 at 10:55:50AM +1000, David Gibson wrote:
> > > > > > On Tue, Apr 17, 2018 at 06:53:17PM +0530, Balamuruhan S wrote:
> > > > > > > expected_downtime value is not accurate with dirty_pages_rate * 
> > > > > > > page_size,
> > > > > > > using ram_bytes_remaining would yeild it correct.
> > > > > > 
> > > > > > This commit message hasn't been changed since v1, but the patch is
> > > > > > doing something completely different.  I think most of the info from
> > > > > > your cover letter needs to be in here.
> > > > > > 
> > > > > > > 
> > > > > > > Signed-off-by: Balamuruhan S 
> > > > > > > ---
> > > > > > >  migration/migration.c | 6 +++---
> > > > > > >  migration/migration.h | 1 +
> > > > > > >  2 files changed, 4 insertions(+), 3 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > index 52a5092add..4d866bb920 100644
> > > > > > > --- a/migration/migration.c
> > > > > > > +++ b/migration/migration.c
> > > > > > > @@ -614,7 +614,7 @@ static void populate_ram_info(MigrationInfo 
> > > > > > > *info, MigrationState *s)
> > > > > > >  }
> > > > > > >  
> > > > > > >  if (s->state != MIGRATION_STATUS_COMPLETED) {
> > > > > > > -info->ram->remaining = ram_bytes_remaining();
> > > > > > > +info->ram->remaining = s->ram_bytes_remaining;
> > > > > > >  info->ram->dirty_pages_rate = 
> > > > > > > ram_counters.dirty_pages_rate;
> > > > > > >  }
> > > > > > >  }
> > > > > > > @@ -2227,6 +2227,7 @@ static void 
> > > > > > > migration_update_counters(MigrationState *s,
> > > > > > >  transferred = qemu_ftell(s->to_dst_file) - 
> > > > > > > s->iteration_initial_bytes;
> > > > > > >  time_spent = current_time - s->iteration_start_time;
> > > > > > >  bandwidth = (double)transferred / time_spent;
> > > > > > > +s->ram_bytes_remaining = ram_bytes_remaining();
> > > > > > >  s->threshold_size = bandwidth * s->parameters.downtime_limit;
> > > > > > >  
> > > > > > >  s->mbps = (((double) transferred * 8.0) /
> > > > > > > @@ -2237,8 +2238,7 @@ static void 
> > > > > > > migration_update_counters(MigrationState *s,
> > > > > > >   * recalculate. 1 is a small enough number for our 
> > > > > > > purposes
> > > > > > >   */
> > > > > > >  if (ram_counters.dirty_pages_rate && transferred > 1) {
> > > > > > > -s->expected_downtime = ram_counters.dirty_pages_rate *
> > > > > > > -qemu_target_page_size() / bandwidth;
> > > > > > > +s->expected_downtime = s->ram_bytes_remaining / 
> > > > > > > bandwidth;
> > > > > > >  }
> > > > > 
> > > > > ..but more importantly, I still think this change is bogus.  expected
> > > > > downtime is not the same thing as remaining ram / bandwidth.
> > > > 
> > > > I tested precopy migration of 16M HP backed P8 guest from P8 to 1G P9 
> > > > host
> > > > and observed precopy migration was infinite with expected_downtime set 
> > > > as
> > > > downtime-limit.
> > > 
> > > Did you debug why it was infinite? Which component of the calculation
> > > had gone wrong and why?
> > > 
> > > > During the discussion for Bug RH1560562, Michael Roth quoted that
> > > > 
> > > > One thing to note: in my testing I found that the "expected downtime" 
> > > > value
> > > > seems inaccurate in this scenario. To find a max downtime that allowed
> > > > migration to complete I had to divide "remaining ram" by "throughput" 
> > > > from
> > > > "info migrate" (after the initial pre-copy pass through ram, i.e. once
> > > > "dirty pages" value starts getting reported and we're just sending 
> > > > dirtied
> > > > pages).
> > > > 
> > > > Later by trying it precopy migration could able to complete with this
> > > > approach.
> > > > 
> > > > adding Michael Roth in cc.
> > > 
> > > We should try and _understand_ the rational for the change, not just go
> > > with it.  Now, remember that whatever we do is just an estimate and
> > 
> > I have made the change based on my understanding,
> > 
> > Currently the calculation is,
> > 
> > expected_downtime = (dirty_pages_rate * qemu_target_page_size) / bandwidth
> > 
> > dirty_pages_rate = No of dirty pages / time =>  its unit (1 / seconds)
> > qemu_target_page_size => its unit (bytes)
> > 
> > dirty_pages_rate * qemu_target_page_size => bytes/seconds
> > 
> > bandwidth = bytes transferred / time => bytes/seconds
> > 
> > dividing this would not be a measurement of time.
> 
> Hm, that's a good point, the units are not right here.  And thinking
> about it more, it doesn't really make sense for it to be linear
you are right.

> either.  After all if the pa

Re: [Qemu-devel] [PATCH v2 1/1] migration: calculate expected_downtime with ram_bytes_remaining()

2018-04-21 Thread Balamuruhan S
On Fri, Apr 20, 2018 at 11:28:04AM +0100, Dr. David Alan Gilbert wrote:
> * David Gibson (da...@gibson.dropbear.id.au) wrote:
> > On Thu, Apr 19, 2018 at 12:24:04PM +0100, Dr. David Alan Gilbert wrote:
> > > * Balamuruhan S (bal...@linux.vnet.ibm.com) wrote:
> > > > On Wed, Apr 18, 2018 at 09:36:33AM +0100, Dr. David Alan Gilbert wrote:
> > > > > * Balamuruhan S (bal...@linux.vnet.ibm.com) wrote:
> > > > > > On Wed, Apr 18, 2018 at 10:57:26AM +1000, David Gibson wrote:
> > > > > > > On Wed, Apr 18, 2018 at 10:55:50AM +1000, David Gibson wrote:
> > > > > > > > On Tue, Apr 17, 2018 at 06:53:17PM +0530, Balamuruhan S wrote:
> > > > > > > > > expected_downtime value is not accurate with dirty_pages_rate 
> > > > > > > > > * page_size,
> > > > > > > > > using ram_bytes_remaining would yeild it correct.
> > > > > > > > 
> > > > > > > > This commit message hasn't been changed since v1, but the patch 
> > > > > > > > is
> > > > > > > > doing something completely different.  I think most of the info 
> > > > > > > > from
> > > > > > > > your cover letter needs to be in here.
> > > > > > > > 
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Balamuruhan S 
> > > > > > > > > ---
> > > > > > > > >  migration/migration.c | 6 +++---
> > > > > > > > >  migration/migration.h | 1 +
> > > > > > > > >  2 files changed, 4 insertions(+), 3 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > > > index 52a5092add..4d866bb920 100644
> > > > > > > > > --- a/migration/migration.c
> > > > > > > > > +++ b/migration/migration.c
> > > > > > > > > @@ -614,7 +614,7 @@ static void 
> > > > > > > > > populate_ram_info(MigrationInfo *info, MigrationState *s)
> > > > > > > > >  }
> > > > > > > > >  
> > > > > > > > >  if (s->state != MIGRATION_STATUS_COMPLETED) {
> > > > > > > > > -info->ram->remaining = ram_bytes_remaining();
> > > > > > > > > +info->ram->remaining = s->ram_bytes_remaining;
> > > > > > > > >  info->ram->dirty_pages_rate = 
> > > > > > > > > ram_counters.dirty_pages_rate;
> > > > > > > > >  }
> > > > > > > > >  }
> > > > > > > > > @@ -2227,6 +2227,7 @@ static void 
> > > > > > > > > migration_update_counters(MigrationState *s,
> > > > > > > > >  transferred = qemu_ftell(s->to_dst_file) - 
> > > > > > > > > s->iteration_initial_bytes;
> > > > > > > > >  time_spent = current_time - s->iteration_start_time;
> > > > > > > > >  bandwidth = (double)transferred / time_spent;
> > > > > > > > > +s->ram_bytes_remaining = ram_bytes_remaining();
> > > > > > > > >  s->threshold_size = bandwidth * 
> > > > > > > > > s->parameters.downtime_limit;
> > > > > > > > >  
> > > > > > > > >  s->mbps = (((double) transferred * 8.0) /
> > > > > > > > > @@ -2237,8 +2238,7 @@ static void 
> > > > > > > > > migration_update_counters(MigrationState *s,
> > > > > > > > >   * recalculate. 1 is a small enough number for our 
> > > > > > > > > purposes
> > > > > > > > >   */
> > > > > > > > >  if (ram_counters.dirty_pages_rate && transferred > 
> > > > > > > > > 1) {
> > > > > > > > > -s->expected_downtime = ram_counters.dirty_pages_rate 
> > > > > > > > > *
> > > > > > > > > -qemu_target_page_size() / bandwidth;
> > > > > > > > > +s->expected_downtime = s->ram_bytes_remaining / 
> > > > > > > > > bandwidth;
> > > > > > > > >  }
> > > > > > > 
> > > > > > > ..but more importantly, I still think this change is bogus.  
> > > > > > > expected
> > > > > > > downtime is not the same thing as remaining ram / bandwidth.
> > > > > > 
> > > > > > I tested precopy migration of 16M HP backed P8 guest from P8 to 1G 
> > > > > > P9 host
> > > > > > and observed precopy migration was infinite with expected_downtime 
> > > > > > set as
> > > > > > downtime-limit.
> > > > > 
> > > > > Did you debug why it was infinite? Which component of the calculation
> > > > > had gone wrong and why?
> > > > > 
> > > > > > During the discussion for Bug RH1560562, Michael Roth quoted that
> > > > > > 
> > > > > > One thing to note: in my testing I found that the "expected 
> > > > > > downtime" value
> > > > > > seems inaccurate in this scenario. To find a max downtime that 
> > > > > > allowed
> > > > > > migration to complete I had to divide "remaining ram" by 
> > > > > > "throughput" from
> > > > > > "info migrate" (after the initial pre-copy pass through ram, i.e. 
> > > > > > once
> > > > > > "dirty pages" value starts getting reported and we're just sending 
> > > > > > dirtied
> > > > > > pages).
> > > > > > 
> > > > > > Later by trying it precopy migration could able to complete with 
> > > > > > this
> > > > > > approach.
> > > > > > 
> > > > > > adding Michael Roth in cc.
> > > > > 
> > > > > We should try and _understand_ the rational for the change, not just 
> > > > > go
> > > > > with it.  Now, remember that whatever we do is just an estimate and
> > > > 
> > > > I have made the change bas

[Qemu-devel] [Bug 1765970] Re: qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to 2.11.x

2018-04-21 Thread Peter Maydell
Could you try with current head-of-git (the 2.12 rc4)? We adjusted our
logic for setting up the initial reserved space since 2.11 -- it may
well not fix this bug, but maybe we'll be lucky...

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1765970

Title:
  qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to
  2.11.x

Status in QEMU:
  New

Bug description:
  I use a qemu-user chroot + binfmt to build software targetting a
  raspberry pi.  After upgrading from qemu-2.10.1 to 2.11.1 (Gentoo
  host), I noticed that on my uclibc-ng chroot qemu-arm will segfault
  when running python and importing the portage module.

  I have bisected qemu down to this commit:

  https://github.com/qemu/qemu/commit/18e80c55bb6ec17c05ec0ba717ec83933c2bfc07

  If I recompile and change MAX_RESERVED_VA (from the above commit) back
  to 0x7700 the problem goes away.  NB I have no idea what that
  does, I just thought I'd see.

  
  Other arm chroots (glibc, musl) do not segfault with qemu-2.11, only the 
uclibc-ng one.  Not sure why.

  
  The following backtrace was generated from running qemu-arm in gdb and 
recreating the segfault:

  (gdb) where
  #0  0x60726046 in static_code_gen_buffer ()
  #1  0x60048789 in cpu_tb_exec (cpu=0x6278e310, 
  itb=0x60725f80 )
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:167
  #2  0x6004937f in cpu_loop_exec_tb (cpu=0x6278e310, 
  tb=0x60725f80 , last_tb=0x7fffd138, 
  tb_exit=0x7fffd130)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:627
  #3  0x60049600 in cpu_exec (cpu=0x6278e310)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:736
  #4  0x600511c3 in cpu_loop (env=0x627965b0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:585
  #5  0x600534eb in main (argc=4, argv=0x7fffd9b8, 
  envp=0x7fffd9e0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:4882


  (gdb) info reg
  rax0x627965b0   1652123056
  rbx0x62717870   1651603568
  rcx0x606da000   1617797120
  rdx0x60726000   1618108416
  rsi0x60726000   1618108416
  rdi0x627965b0   1652123056
  rbp0x7fffd0c0   0x7fffd0c0
  rsp0x7fffd080   0x7fffd080
  r8 0x0  0
  r9 0x2  2
  r100x0  0
  r110x629280a0   1653768352
  r120x60260e40   1613106752
  r130x0  0
  r140x606a5018   1617580056
  r150x0  0
  rip0x60048789   0x60048789 
  eflags 0x10282  [ SF IF RF ]
  cs 0x33 51
  ss 0x2b 43
  ds 0x0  0
  es 0x0  0
  fs 0x0  0
  gs 0x0  0
  (gdb)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1765970/+subscriptions



[Qemu-devel] [Bug 1765970] Re: qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to 2.11.x

2018-04-21 Thread diddly
Unfortunately, no dice

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1765970

Title:
  qemu-arm (user mode) segfaults in uclibc-ng chroot after upgrade to
  2.11.x

Status in QEMU:
  New

Bug description:
  I use a qemu-user chroot + binfmt to build software targetting a
  raspberry pi.  After upgrading from qemu-2.10.1 to 2.11.1 (Gentoo
  host), I noticed that on my uclibc-ng chroot qemu-arm will segfault
  when running python and importing the portage module.

  I have bisected qemu down to this commit:

  https://github.com/qemu/qemu/commit/18e80c55bb6ec17c05ec0ba717ec83933c2bfc07

  If I recompile and change MAX_RESERVED_VA (from the above commit) back
  to 0x7700 the problem goes away.  NB I have no idea what that
  does, I just thought I'd see.

  
  Other arm chroots (glibc, musl) do not segfault with qemu-2.11, only the 
uclibc-ng one.  Not sure why.

  
  The following backtrace was generated from running qemu-arm in gdb and 
recreating the segfault:

  (gdb) where
  #0  0x60726046 in static_code_gen_buffer ()
  #1  0x60048789 in cpu_tb_exec (cpu=0x6278e310, 
  itb=0x60725f80 )
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:167
  #2  0x6004937f in cpu_loop_exec_tb (cpu=0x6278e310, 
  tb=0x60725f80 , last_tb=0x7fffd138, 
  tb_exit=0x7fffd130)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:627
  #3  0x60049600 in cpu_exec (cpu=0x6278e310)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/accel/tcg/cpu-exec.c:736
  #4  0x600511c3 in cpu_loop (env=0x627965b0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:585
  #5  0x600534eb in main (argc=4, argv=0x7fffd9b8, 
  envp=0x7fffd9e0)
  at 
/usr/src/debug/app-emulation/qemu-2.11.1-r2/qemu-2.11.1/linux-user/main.c:4882


  (gdb) info reg
  rax0x627965b0   1652123056
  rbx0x62717870   1651603568
  rcx0x606da000   1617797120
  rdx0x60726000   1618108416
  rsi0x60726000   1618108416
  rdi0x627965b0   1652123056
  rbp0x7fffd0c0   0x7fffd0c0
  rsp0x7fffd080   0x7fffd080
  r8 0x0  0
  r9 0x2  2
  r100x0  0
  r110x629280a0   1653768352
  r120x60260e40   1613106752
  r130x0  0
  r140x606a5018   1617580056
  r150x0  0
  rip0x60048789   0x60048789 
  eflags 0x10282  [ SF IF RF ]
  cs 0x33 51
  ss 0x2b 43
  ds 0x0  0
  es 0x0  0
  fs 0x0  0
  gs 0x0  0
  (gdb)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1765970/+subscriptions



[Qemu-devel] [PATCH] loader: Fix misaligned member access

2018-04-21 Thread Philippe Mathieu-Daudé
This fixes the following ASan warning:

  $ mips64el-softmmu/qemu-system-mips64el -M boston -kernel vmlinux.gz.itb 
-nographic
  hw/core/loader-fit.c:108:17: runtime error: load of misaligned address 
0x7f95cd7e4264 for type 'fdt64_t', which requires 8 byte alignment
  0x7f95cd7e4264: note: pointer points here
00 00 00 3e ff ff ff ff  80 7d 2a c0 00 00 00 01  68 61 73 68 40 30 00 00  
00 00 00 03 00 00 00 14
^

Reported-by: AddressSanitizer
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/core/loader-fit.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
index 0c4a7207f4..1a69697f89 100644
--- a/hw/core/loader-fit.c
+++ b/hw/core/loader-fit.c
@@ -93,6 +93,8 @@ static int fit_image_addr(const void *itb, int img, const 
char *name,
   hwaddr *addr)
 {
 const void *prop;
+fdt32_t v32;
+fdt64_t v64;
 int len;
 
 prop = fdt_getprop(itb, img, name, &len);
@@ -102,10 +104,12 @@ static int fit_image_addr(const void *itb, int img, const 
char *name,
 
 switch (len) {
 case 4:
-*addr = fdt32_to_cpu(*(fdt32_t *)prop);
+memcpy(&v32, prop, sizeof(v32));
+*addr = fdt32_to_cpu(v32);
 return 0;
 case 8:
-*addr = fdt64_to_cpu(*(fdt64_t *)prop);
+memcpy(&v64, prop, sizeof(v64));
+*addr = fdt64_to_cpu(v64);
 return 0;
 default:
 error_printf("invalid %s address length %d\n", name, len);
-- 
2.17.0




Re: [Qemu-devel] [PATCH v3 00/35] ppc: support for the XIVE interrupt controller (POWER9)

2018-04-21 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180419124331.3915-1-...@kaod.org
Subject: [Qemu-devel] [PATCH v3 00/35] ppc: support for the XIVE interrupt 
controller (POWER9)

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   patchew/20180419124331.3915-1-...@kaod.org -> 
patchew/20180419124331.3915-1-...@kaod.org
Switched to a new branch 'test'
c4469e7316 ppc/pnv: add a PSI bridge model for POWER9 processor
33f2dbc799 ppc/pnv: add XIVE support
403005364d ppc: externalize ppc_get_vcpu_by_pir()
aaccb0b820 ppc/pnv: introduce a pnv_icp_create() helper
238430eaf9 spapr/xive: raise migration priority of the machine
58147d3114 spapr/xive, xics: reset KVM at machine reset
c10eec0b36 spapr/xive, xics: use the CPU_INTC handlers to reset KVM
ac85306946 intc: introduce a CPUIntc interface
5db0587d7a migration: discard non-migratable RAMBlocks
58b1c5a84d spapr/xive: add a XIVE KVM device to the machine
d04e47a5ce spapr/xive: add KVM support
0dfc28a269 spapr/xive: add common realize routine for KVM
2ecb56c69d target/ppc/kvm: add Linux KVM definitions for XIVE
87b9d2e8b2 spapr: add classes for the XIVE models
e83377df2b spapr: advertise XIVE exploitation mode in CAS
48987801b2 spapr: add support to dump XIVE information
829e2cbf09 spapr: toggle the ICP depending on the selected interrupt mode
3e35956bbf spapr: introduce a spapr_icp_create() helper
519c790144 spapr: add XIVE support to spapr_qirq()
f4f30c31c4 spapr: introduce a helper to map the XIVE memory regions
70574a057b sysbus: add a sysbus_mmio_unmap() helper
54465abc2d spapr: add device tree support for the XIVE exploitation mode
879817b623 spapr: add hcalls support for the XIVE exploitation interrupt mode
4afdd938d5 spapr: add a sPAPRXive object to the machine
f67a9f05b9 spapr: introduce a 'xive_exploitation' option to enable XIVE
11464bf255 spapr: add support for the SET_OS_PENDING command (XIVE)
a619ca5b74 spapr: notify the CPU when the XIVE interrupt priority is more 
privileged
5fc2664cfe spapr: push the XIVE EQ data in OS event queue
f62e7dbede spapr/xive: introduce the XIVE Event Queues
329205b1dc spapr/xive: introduce a XIVE interrupt presenter model
876c681a62 spapr/xive: add a single source block to the sPAPR XIVE model
5feac5da74 spapr/xive: introduce a XIVE interrupt controller for sPAPR
4d6b2f074b ppc/xive: introduce the XiveFabric interface
59d738daa4 ppc/xive: add support for the LSI interrupt sources
084d850ee3 ppc/xive: introduce a XIVE interrupt source model

=== OUTPUT BEGIN ===
Checking PATCH 1/35: ppc/xive: introduce a XIVE interrupt source model...
Checking PATCH 2/35: ppc/xive: add support for the LSI interrupt sources...
Checking PATCH 3/35: ppc/xive: introduce the XiveFabric interface...
Checking PATCH 4/35: spapr/xive: introduce a XIVE interrupt controller for 
sPAPR...
Checking PATCH 5/35: spapr/xive: add a single source block to the sPAPR XIVE 
model...
Checking PATCH 6/35: spapr/xive: introduce a XIVE interrupt presenter model...
Checking PATCH 7/35: spapr/xive: introduce the XIVE Event Queues...
Checking PATCH 8/35: spapr: push the XIVE EQ data in OS event queue...
Checking PATCH 9/35: spapr: notify the CPU when the XIVE interrupt priority is 
more privileged...
Checking PATCH 10/35: spapr: add support for the SET_OS_PENDING command 
(XIVE)...
Checking PATCH 11/35: spapr: introduce a 'xive_exploitation' option to enable 
XIVE...
Checking PATCH 12/35: spapr: add a sPAPRXive object to the machine...
Checking PATCH 13/35: spapr: add hcalls support for the XIVE exploitation 
interrupt mode...
Checking PATCH 14/35: spapr: add device tree support for the XIVE exploitation 
mode...
Checking PATCH 15/35: sysbus: add a sysbus_mmio_unmap() helper...
Checking PATCH 16/35: spapr: introduce a helper to map the XIVE memory 
regions...
Checking PATCH 17/35: spapr: add XIVE support to spapr_qirq()...
Checking PATCH 18/35: spapr: introduce a spapr_icp_create() helper...
Checking PATCH 19/35: spapr: toggle the ICP depending on the selected interrupt 
mode...
Checking PATCH 20/35: spapr: add support to dump XIVE information...
Checking PATCH 21/35: spapr: advertise XIVE exploitation mode in CAS...
Checking PATCH 22/35: spapr: add classes for the XIVE models...
Checking PATCH 23/35: target/ppc/kvm: add Linux KVM definitions for XIVE...
Checking PATCH 24/35: spapr/xive

Re: [Qemu-devel] [PATCH 05/13] hw/xtensa/xtfpga.c: Don't create "null" chardevs for serial devices

2018-04-21 Thread Philippe Mathieu-Daudé
On 04/20/2018 11:52 AM, Peter Maydell wrote:
> Following commit 12051d82f004024, UART devices should handle
> being passed a NULL pointer chardev, so we don't need to
> create "null" backends in board code. Remove the code that
> does this and updates serial_hds[].
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 

> ---
>  hw/xtensa/xtfpga.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
> index 70686a2eb1..9db99e1f7e 100644
> --- a/hw/xtensa/xtfpga.c
> +++ b/hw/xtensa/xtfpga.c
> @@ -278,10 +278,6 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, 
> MachineState *machine)
>  xtensa_get_extint(env, 1), nd_table);
>  }
>  
> -if (!serial_hds[0]) {
> -serial_hds[0] = qemu_chr_new("serial0", "null");
> -}
> -
>  serial_mm_init(system_io, 0x0d050020, 2, xtensa_get_extint(env, 0),
>  115200, serial_hds[0], DEVICE_NATIVE_ENDIAN);
>  
> 



Re: [Qemu-devel] [RFC 00/24] Avocado-based functional tests

2018-04-21 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180420181951.7252-1-ehabk...@redhat.com
Subject: [Qemu-devel] [RFC 00/24] Avocado-based functional tests

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]   patchew/20180420181951.7252-1-ehabk...@redhat.com -> 
patchew/20180420181951.7252-1-ehabk...@redhat.com
Switched to a new branch 'test'
ec49778cf4 avocado_qemu: Add a few VNC related tests
1f46f502ab avocado_qemu: Force vmimage distro
3c652684e4 avocado_qemu: Tests fixes
c4e2d71898 avocado_qemu: Introduce the add_image() VM API
35b9856b7d avocado_qemu: Set QMP log level to INFO
3d81ee79d0 avocado_qemu: Clean unneeded 'pass'
3fea592db2 avocado_qemu: Simplify the installation instructions
d1b9960835 avocado_qemu: Remove duplicate PortTracker implementation
030c3444fa avocado_qemu: Functional test for RHBZ1473203
4452f51949 avocado_qemu: Functional test for RHBZ#1436616
a791819b84 avocado_qemu: Functional test for RHBZ#1447027
1b525617c2 avocado_qemu: Functional test for RHBZ#1431939
8cb3174c7d avocado_qemu: Improve migration error message
800fdc0407 avocado_qemu: Fix exception name in caller
487b76eeea avocado_qemu: Add support to request image for testing
2349952ea0 avocado_qemu: Ignore kernel messages on get_console
ee1ba766ab avocado_qemu: Provide defaults for user and pass
2151fc0647 avocado_qemu: Store "arch" in VM
27d897fc64 avocado_qemu: Add " " after the default prompt regexp
1b31385780 avocado_qemu: Increase the login timeout to 60s
f964227931 avocado_qemu: Be lenient towards poluted serial console
903c5ccd71 avocado_qemu: Improve handle_prompts to allow login after booted vm
46e5bdc04f Introduce the basic framework to run Avocado tests
9a629a9861 qemu.py: Introduce _create_console() method

=== OUTPUT BEGIN ===
Checking PATCH 1/24: qemu.py: Introduce _create_console() method...
Checking PATCH 2/24: Introduce the basic framework to run Avocado tests...
WARNING: line over 80 characters
#311: FILE: tests/avocado/avocado_qemu/test.py:118:
+:raise QEMULoginProcessTerminatedError: If the client terminates during 
login

WARNING: line over 80 characters
#399: FILE: tests/avocado/avocado_qemu/test.py:206:
+raise QEMULoginProcessTerminatedError(details.status, 
details.output)

total: 0 errors, 2 warnings, 661 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 3/24: avocado_qemu: Improve handle_prompts to allow login after 
booted vm...
Checking PATCH 4/24: avocado_qemu: Be lenient towards poluted serial console...
Checking PATCH 5/24: avocado_qemu: Increase the login timeout to 60s...
Checking PATCH 6/24: avocado_qemu: Add " " after the default prompt regexp...
Checking PATCH 7/24: avocado_qemu: Store "arch" in VM...
Checking PATCH 8/24: avocado_qemu: Provide defaults for user and pass...
Checking PATCH 9/24: avocado_qemu: Ignore kernel messages on get_console...
Checking PATCH 10/24: avocado_qemu: Add support to request image for testing...
Checking PATCH 11/24: avocado_qemu: Fix exception name in caller...
Checking PATCH 12/24: avocado_qemu: Improve migration error message...
Checking PATCH 13/24: avocado_qemu: Functional test for RHBZ#1431939...
WARNING: line over 80 characters
#73: FILE: tests/avocado/test_info_memdev_host_nodes.py:49:
+cmd = 'object_add 
memory-backend-ram,id=mem1,host-nodes=0,size=2G,policy=bind'

total: 0 errors, 1 warnings, 66 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
Checking PATCH 14/24: avocado_qemu: Functional test for RHBZ#1447027...
ERROR: line over 90 characters
#51: FILE: tests/avocado/test_ovmf_with_240_vcpus.py:22:
+ 
default='/usr/share/edk2/ovmf/OVMF_CODE.secboot.fd')

WARNING: line over 80 characters
#53: FILE: tests/avocado/test_ovmf_with_240_vcpus.py:24:
+ 
default='/usr/share/edk2/ovmf/OVMF_VARS.fd')

total: 1 errors, 1 warnings, 72 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 15/24: avocado_qe

[Qemu-devel] [PATCH] elf-loader: Avoid calling qsort(NULL, 0, ...) call

2018-04-21 Thread Philippe Mathieu-Daudé
This fixes the following ASan warning:

  $ qemu-system-xtensa -M kc705 -m 128M -semihosting -nographic -monitor null 
-kernel Image.elf
  include/hw/elf_ops.h:179:5: runtime error: null pointer passed as argument 1, 
which is declared to never be null

Reported-by: AddressSanitizer
Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/elf_ops.h | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index b6e19e35d0..f0ac7c6c4e 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -110,7 +110,7 @@ static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int 
fd, int must_swab,
 struct elf_shdr *symtab, *strtab, *shdr_table = NULL;
 struct elf_sym *syms = NULL;
 struct syminfo *s;
-int nsyms, i;
+int nsyms, i, ret = -1;
 char *str = NULL;
 
 shdr_table = load_at(fd, ehdr->e_shoff,
@@ -143,6 +143,7 @@ static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int 
fd, int must_swab,
 if (!str) {
 goto fail;
 }
+ret = 0;
 
 i = 0;
 while (i < nsyms) {
@@ -170,30 +171,34 @@ static int glue(load_symbols, SZ)(struct elfhdr *ehdr, 
int fd, int must_swab,
 }
 i++;
 }
-syms = g_realloc(syms, nsyms * sizeof(*syms));
+if (nsyms) {
+syms = g_realloc(syms, nsyms * sizeof(*syms));
 
-qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ));
-for (i = 0; i < nsyms - 1; i++) {
-if (syms[i].st_size == 0) {
-syms[i].st_size = syms[i + 1].st_value - syms[i].st_value;
+qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ));
+for (i = 0; i < nsyms - 1; i++) {
+if (syms[i].st_size == 0) {
+syms[i].st_size = syms[i + 1].st_value - syms[i].st_value;
+}
 }
+
+/* Commit */
+s = g_malloc0(sizeof(*s));
+s->lookup_symbol = glue(lookup_symbol, SZ);
+glue(s->disas_symtab.elf, SZ) = syms;
+s->disas_num_syms = nsyms;
+s->disas_strtab = str;
+s->next = syminfos;
+syminfos = s;
+
+goto out;
 }
 
-/* Commit */
-s = g_malloc0(sizeof(*s));
-s->lookup_symbol = glue(lookup_symbol, SZ);
-glue(s->disas_symtab.elf, SZ) = syms;
-s->disas_num_syms = nsyms;
-s->disas_strtab = str;
-s->next = syminfos;
-syminfos = s;
-g_free(shdr_table);
-return 0;
  fail:
 g_free(syms);
 g_free(str);
+ out:
 g_free(shdr_table);
-return -1;
+return ret;
 }
 
 static int glue(elf_reloc, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
-- 
2.17.0




Re: [Qemu-devel] [PATCH v3 1/3] pc-dimm: factor out MemoryDevice interface

2018-04-21 Thread David Gibson
On Fri, Apr 20, 2018 at 02:34:54PM +0200, David Hildenbrand wrote:
> On the qmp level, we already have the concept of memory devices:
> "query-memory-devices"
> Right now, we only support NVDIMM and PCDIMM.
> 
> We want to map other devices later into the address space of the guest.
> Such device could e.g. be virtio devices. These devices will have a
> guest memory range assigned but won't be exposed via e.g. ACPI. We want
> to make them look like memory device, but not glued to pc-dimm.
> 
> Especially, it will not always be possible to have TYPE_PC_DIMM as a parent
> class (e.g. virtio devices). Let's use an interface instead. As a first
> part, convert handling of
> - qmp_pc_dimm_device_list
> - get_plugged_memory_size
> to our new model. plug/unplug stuff etc. will follow later.
> 
> A memory device will have to provide the following functions:
> - get_addr(): Necessary, as the property "addr" can e.g. not be used for
>   virtio devices (already defined).
> - get_plugged_size(): The amount this device offers to the guest as of
>   now.
> - get_region_size(): Because this can later on be bigger than the
>  plugged size.
> - fill_device_info(): Fill MemoryDeviceInfo, e.g. for qmp.
> 
> Signed-off-by: David Hildenbrand 

Reviewed-by: David Gibson 

with the exception of some tiny nits..

[snip]
> +static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
> +{
> +MemoryDeviceState *md_a = MEMORY_DEVICE(a);
> +MemoryDeviceState *md_b = MEMORY_DEVICE(b);

These probably should be const MemoryDeviceState *.

> +MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a);
> +MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b);
> +const uint64_t addr_a = mdc_a->get_addr(md_a);
> +const uint64_t addr_b = mdc_b->get_addr(md_b);
> +
> +if (addr_a > addr_b) {
> +return 1;
> +} else if (addr_a < addr_b) {
> +return -1;
> +}
> +return 0;
> +}
> +
> +static int memory_device_built_list(Object *obj, void *opaque)

s/built/build/ will read a bit more clearly I think.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [Bug 1187529] Re: Devices on PCI bridge stop working when live-migrated

2018-04-21 Thread Launchpad Bug Tracker
[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1187529

Title:
  Devices on PCI bridge stop working when live-migrated

Status in QEMU:
  Expired

Bug description:
  qemu version: 1.4.50 (0ca5aa4f4c4a8bcc73988dd52a536241d35e5223)
  host: x86_64, Linux 3.6.10 (Fedora 17)
  client: x86_64 Centos 6.3 (doesn't matter, really)

  If a device, e.g. an lsi53c895a, is on a pci-bridge, after migration, the 
device stops working (e.g., commands like "poweroff"
  get an Input/Output error. Fails under either xen or kvm. If "top" was 
running, some cpus go to ~100% wait.

  Sample KVM invocation line:
  qemu-system-x86_64 -machine type=pc,accel=kvm -m 4096 -device 
pci-bridgemsi=on,chassis_nr=1,id=pciBridge1.0,addr=0x11.0 -device 
lsi53c895a,id=sas,bus=pciBridge1.0,addr=0x1.0x0 -drive 
if=none,id=disk0,file=/path/to/disk/image -device 
scsi-disk,bus=sas.0,scsi-id=0,drive=disk0 -vnc 127.0.0.1:0,to=99 -serial pty 
-boot order=cda -smp 4,maxcpus=4 -monitor vc

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1187529/+subscriptions



Re: [Qemu-devel] [PATCH v3 0/3] pc-dimm: factor out MemoryDevice

2018-04-21 Thread Pankaj Gupta

Hello,

This is good re-factoring and needed for 'virtio-pmem' as well to
reserve memory region in system address space.

I have tested this code with virtio-pmem and its working fine. Thank you
for the work.

I just have a small suggestion : when functions like(get_addr(), 
get_plugged_size etc) 
in the interface are not provided by derived class, Qemu crashes.

I think having a contract for must override functions with NULL check and error
at the calling sites would be better?

Thanks,
Pankaj  

> Right now we can only map PCDIMM/NVDIMM into guest address space. In the
> future, we might want to do the same for virtio devices - e.g.
> virtio-pmem or virtio-mem. Especially, they should be able to live side
> by side to each other.
> 
> E.g. the virto based memory devices regions will not be exposed via ACPI
> and friends. They will be detected just like other virtio devices and
> indicate the applicable memory region. This makes it possible to also use
> them on architectures without memory device detection support (e.g. s390x).
> 
> Let's factor out the memory device code into a MemoryDevice interface.
> 
> 
> v2 -> v3:
> - "pc-dimm: factor out MemoryDevice interface"
> --> Lookup both classes when comparing (David Gibson)
> 
> v1 -> v2:
> - Fix compile issues on ppc (still untested  )
> 
> 
> David Hildenbrand (3):
>   pc-dimm: factor out MemoryDevice interface
>   machine: make MemoryHotplugState accessible via the machine
>   pc-dimm: factor out address space logic into MemoryDevice code
> 
>  hw/i386/acpi-build.c |   3 +-
>  hw/i386/pc.c |  24 ++-
>  hw/mem/Makefile.objs |   1 +
>  hw/mem/memory-device.c   | 282 +
>  hw/mem/pc-dimm.c | 304
>  +++
>  hw/ppc/spapr.c   |  24 ++-
>  hw/ppc/spapr_hcall.c |   1 +
>  include/hw/boards.h  |  16 ++
>  include/hw/mem/memory-device.h   |  48 +
>  include/hw/mem/pc-dimm.h |  26 +--
>  numa.c   |   3 +-
>  qmp.c|   4 +-
>  stubs/Makefile.objs  |   2 +-
>  stubs/{qmp_pc_dimm.c => qmp_memory_device.c} |   4 +-
>  14 files changed, 465 insertions(+), 277 deletions(-)
>  create mode 100644 hw/mem/memory-device.c
>  create mode 100644 include/hw/mem/memory-device.h
>  rename stubs/{qmp_pc_dimm.c => qmp_memory_device.c} (61%)
> 
> --
> 2.14.3
> 
> 
> 



Re: [Qemu-devel] [PATCH v3 1/3] pc-dimm: factor out MemoryDevice interface

2018-04-21 Thread Pankaj Gupta

> 
> On the qmp level, we already have the concept of memory devices:
> "query-memory-devices"
> Right now, we only support NVDIMM and PCDIMM.
> 
> We want to map other devices later into the address space of the guest.
> Such device could e.g. be virtio devices. These devices will have a
> guest memory range assigned but won't be exposed via e.g. ACPI. We want
> to make them look like memory device, but not glued to pc-dimm.
> 
> Especially, it will not always be possible to have TYPE_PC_DIMM as a parent
> class (e.g. virtio devices). Let's use an interface instead. As a first
> part, convert handling of
> - qmp_pc_dimm_device_list
> - get_plugged_memory_size
> to our new model. plug/unplug stuff etc. will follow later.
> 
> A memory device will have to provide the following functions:
> - get_addr(): Necessary, as the property "addr" can e.g. not be used for
>   virtio devices (already defined).
> - get_plugged_size(): The amount this device offers to the guest as of
>   now.
> - get_region_size(): Because this can later on be bigger than the
>  plugged size.
> - fill_device_info(): Fill MemoryDeviceInfo, e.g. for qmp.
> 
> Signed-off-by: David Hildenbrand 
> ---
>  hw/i386/acpi-build.c |   3 +-
>  hw/mem/Makefile.objs |   1 +
>  hw/mem/memory-device.c   | 120
>  +++
>  hw/mem/pc-dimm.c | 119
>  +-
>  hw/ppc/spapr.c   |   3 +-
>  hw/ppc/spapr_hcall.c |   1 +
>  include/hw/mem/memory-device.h   |  44 ++
>  include/hw/mem/pc-dimm.h |   2 -
>  numa.c   |   3 +-
>  qmp.c|   4 +-
>  stubs/Makefile.objs  |   2 +-
>  stubs/{qmp_pc_dimm.c => qmp_memory_device.c} |   4 +-
>  12 files changed, 240 insertions(+), 66 deletions(-)
>  create mode 100644 hw/mem/memory-device.c
>  create mode 100644 include/hw/mem/memory-device.h
>  rename stubs/{qmp_pc_dimm.c => qmp_memory_device.c} (61%)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 3cf2a1679c..ca3645d57b 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -46,6 +46,7 @@
>  #include "hw/acpi/vmgenid.h"
>  #include "sysemu/tpm_backend.h"
>  #include "hw/timer/mc146818rtc_regs.h"
> +#include "hw/mem/memory-device.h"
>  #include "sysemu/numa.h"
>  
>  /* Supported chipsets: */
> @@ -2253,7 +2254,7 @@ build_tpm2(GArray *table_data, BIOSLinker *linker,
> GArray *tcpalog)
>  static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t
>  base,
> uint64_t len, int default_node)
>  {
> -MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list();
> +MemoryDeviceInfoList *info_list = qmp_memory_device_list();
>  MemoryDeviceInfoList *info;
>  MemoryDeviceInfo *mi;
>  PCDIMMDeviceInfo *di;
> diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs
> index f12f8b97a2..10be4df2a2 100644
> --- a/hw/mem/Makefile.objs
> +++ b/hw/mem/Makefile.objs
> @@ -1,2 +1,3 @@
>  common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o
> +common-obj-$(CONFIG_MEM_HOTPLUG) += memory-device.o
>  common-obj-$(CONFIG_NVDIMM) += nvdimm.o
> diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
> new file mode 100644
> index 00..b860c9c582
> --- /dev/null
> +++ b/hw/mem/memory-device.c
> @@ -0,0 +1,120 @@
> +/*
> + * Memory Device Interface
> + *
> + * Copyright ProfitBricks GmbH 2012
> + * Copyright (C) 2014 Red Hat Inc
> + * Copyright (c) 2018 Red Hat Inc
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/mem/memory-device.h"
> +#include "hw/qdev.h"
> +#include "qapi/error.h"
> +#include "hw/boards.h"
> +#include "qemu/range.h"
> +
> +static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
> +{
> +MemoryDeviceState *md_a = MEMORY_DEVICE(a);
> +MemoryDeviceState *md_b = MEMORY_DEVICE(b);
> +MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a);
> +MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b);
> +const uint64_t addr_a = mdc_a->get_addr(md_a);
> +const uint64_t addr_b = mdc_b->get_addr(md_b);
> +
> +if (addr_a > addr_b) {
> +return 1;
> +} else if (addr_a < addr_b) {
> +return -1;
> +}
> +return 0;
> +}
> +
> +static int memory_device_built_list(Object *obj, void *opaque)
> +{
> +GSList **list = opaque;
> +
> +if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
> +DeviceState *dev = DEVICE(obj);
> +if (dev->realized) { /* only realized memory devices matter */
> +*list = g_slist_insert_sorted(*list, dev,
> memory_device_addr_sort);
> +}
>