[Qemu-devel] [PATCH 0/2] mirror: Allow detection of zeroes on source sectors

2015-06-08 Thread Fam Zheng
[These patches go on top of the "block: Mirror discarded sectors" series]

Some protocols don't have an easy way to query sparseness, (e.g. block/nfs.c,
block/nbd.c), for which block layer always reports block status as "allocated
data".

This will let mirror job do full provisioning even if data is actually sparse
under the hood.

With the new "detect-zeroes" option, we can let mirror job detect zeroes before
sending the data to target, and use zero write when it is more efficient.

Fam


Fam Zheng (2):
  qapi: Add "detect-zeroes" option to drive-mirror
  iotests: Add test cases for drive-mirror "detect-zeroes" option

 block/mirror.c| 21 +++--
 blockdev.c|  6 +-
 hmp.c |  2 +-
 include/block/block_int.h |  3 ++-
 qapi/block-core.json  |  4 +++-
 qmp-commands.hx   |  4 +++-
 tests/qemu-iotests/132| 26 +++---
 tests/qemu-iotests/132.out|  4 ++--
 tests/qemu-iotests/iotests.py |  7 +++
 9 files changed, 61 insertions(+), 16 deletions(-)

-- 
2.4.2




[Qemu-devel] [PATCH 2/2] iotests: Add test cases for drive-mirror "detect-zeroes" option

2015-06-08 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/132| 26 +++---
 tests/qemu-iotests/132.out|  4 ++--
 tests/qemu-iotests/iotests.py |  7 +++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/132 b/tests/qemu-iotests/132
index f53ef6e..e506490 100644
--- a/tests/qemu-iotests/132
+++ b/tests/qemu-iotests/132
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Test mirror with unmap
+# Test mirror with unmap and zero source clusters
 #
 # Copyright (C) 2015 Red Hat, Inc.
 #
@@ -21,7 +21,7 @@
 import time
 import os
 import iotests
-from iotests import qemu_img, qemu_io
+from iotests import qemu_img, qemu_io, qemu_img_map
 
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
@@ -55,5 +55,25 @@ class TestSingleDrive(iotests.QMPTestCase):
 self.assertTrue(iotests.compare_images(test_img, target_img),
 'target image does not match source after mirroring')
 
+def test_detect_zeroes(self):
+self.vm.hmp_qemu_io('drive0', 'write -P 0 0 2M')
+result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ target=target_img, detect_zeroes=True)
+self.assert_qmp(result, 'return', {})
+self.complete_and_wait('drive0')
+self.vm.shutdown()
+m = qemu_img_map(target_img)
+self.assertTrue(m[0]["zero"])
+
+def test_no_detect_zeroes(self):
+self.vm.hmp_qemu_io('drive0', 'write -P 0 0 2M')
+result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ target=target_img, detect_zeroes=False)
+self.assert_qmp(result, 'return', {})
+self.complete_and_wait('drive0')
+self.vm.shutdown()
+m = qemu_img_map(target_img)
+self.assertFalse(m[0]["zero"])
+
 if __name__ == '__main__':
-iotests.main(supported_fmts=['raw', 'qcow2'])
+iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/132.out b/tests/qemu-iotests/132.out
index ae1213e..8d7e996 100644
--- a/tests/qemu-iotests/132.out
+++ b/tests/qemu-iotests/132.out
@@ -1,5 +1,5 @@
-.
+...
 --
-Ran 1 tests
+Ran 3 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 8615b10..2ddc735 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -27,6 +27,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 
'..', 'scripts', '
 import qmp
 import qtest
 import struct
+import json
 
 __all__ = ['imgfmt', 'imgproto', 'test_dir' 'qemu_img', 'qemu_io',
'VM', 'QMPTestCase', 'notrun', 'main']
@@ -58,6 +59,12 @@ def qemu_img_pipe(*args):
 '''Run qemu-img and return its output'''
 return subprocess.Popen(qemu_img_args + list(args), 
stdout=subprocess.PIPE).communicate()[0]
 
+def qemu_img_map(*args):
+'''Run qemu-img map and return the result parsed from the json formated
+output '''
+output = qemu_img_pipe(*(['map', '--output=json'] + list(args)))
+return json.loads(output)
+
 def qemu_io(*args):
 '''Run qemu-io and return the stdout data'''
 args = qemu_io_args + list(args)
-- 
2.4.2




[Qemu-devel] [PATCH 1/2] qapi: Add "detect-zeroes" option to drive-mirror

2015-06-08 Thread Fam Zheng
The new optional flag defaults to true, in which case, mirror job would
check the read sectors and use sparse write if they are zero.  Otherwise
data will be fully copied.

Signed-off-by: Fam Zheng 
---
 block/mirror.c| 21 +++--
 blockdev.c|  6 +-
 hmp.c |  2 +-
 include/block/block_int.h |  3 ++-
 qapi/block-core.json  |  4 +++-
 qmp-commands.hx   |  4 +++-
 6 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 3c38695..261373c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -58,6 +58,7 @@ typedef struct MirrorBlockJob {
 int sectors_in_flight;
 int ret;
 bool unmap;
+bool detect_zeroes;
 } MirrorBlockJob;
 
 typedef struct MirrorOp {
@@ -153,8 +154,14 @@ static void mirror_read_complete(void *opaque, int ret)
 mirror_iteration_done(op, ret);
 return;
 }
-bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
-mirror_write_complete, op);
+if (s->detect_zeroes && qemu_iovec_is_zero(&op->qiov)) {
+bdrv_aio_write_zeroes(s->target, op->sector_num, op->nb_sectors,
+  s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+  mirror_write_complete, op);
+} else {
+bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+mirror_write_complete, op);
+}
 }
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
@@ -668,7 +675,7 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
  int64_t buf_size,
  BlockdevOnError on_source_error,
  BlockdevOnError on_target_error,
- bool unmap,
+ bool unmap, bool detect_zeroes,
  BlockCompletionFunc *cb,
  void *opaque, Error **errp,
  const BlockJobDriver *driver,
@@ -704,6 +711,7 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->granularity = granularity;
 s->buf_size = MAX(buf_size, granularity);
 s->unmap = unmap;
+s->detect_zeroes = detect_zeroes;
 
 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 if (!s->dirty_bitmap) {
@@ -722,7 +730,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState 
*target,
   int64_t speed, uint32_t granularity, int64_t buf_size,
   MirrorSyncMode mode, BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
-  bool unmap,
+  bool unmap, bool detect_zeroes,
   BlockCompletionFunc *cb,
   void *opaque, Error **errp)
 {
@@ -737,7 +745,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState 
*target,
 base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
 mirror_start_job(bs, target, replaces,
  speed, granularity, buf_size,
- on_source_error, on_target_error, unmap, cb, opaque, errp,
+ on_source_error, on_target_error, unmap,
+ detect_zeroes, cb, opaque, errp,
  &mirror_job_driver, is_none_mode, base);
 }
 
@@ -785,7 +794,7 @@ void commit_active_start(BlockDriverState *bs, 
BlockDriverState *base,
 
 bdrv_ref(base);
 mirror_start_job(bs, base, NULL, speed, 0, 0,
- on_error, on_error, false, cb, opaque, &local_err,
+ on_error, on_error, false, false, cb, opaque, &local_err,
  &commit_active_job_driver, false, base);
 if (local_err) {
 error_propagate(errp, local_err);
diff --git a/blockdev.c b/blockdev.c
index 4387e14..d86ec1c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2633,6 +2633,7 @@ void qmp_drive_mirror(const char *device, const char 
*target,
   bool has_on_source_error, BlockdevOnError 
on_source_error,
   bool has_on_target_error, BlockdevOnError 
on_target_error,
   bool has_unmap, bool unmap,
+  bool has_detect_zeroes, bool detect_zeroes,
   Error **errp)
 {
 BlockBackend *blk;
@@ -2667,6 +2668,9 @@ void qmp_drive_mirror(const char *device, const char 
*target,
 if (!has_unmap) {
 unmap = true;
 }
+if (!has_detect_zeroes) {
+detect_zeroes = true;
+}
 
 if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) 
{
 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -2806,7 +2810,7 @@ void qmp_drive_mirror(const char *device, const char 
*target,
  has_replaces ? replaces : NULL,
  speed, granularity, buf_size, sync,
  on_source_error, on_target_

Re: [Qemu-devel] [PATCH] xen/pass-through: ROM BAR handling adjustments

2015-06-08 Thread Jan Beulich
>>> On 05.06.15 at 18:41,  wrote:
> On Fri, 5 Jun 2015, Jan Beulich wrote:
>> >>> On 05.06.15 at 13:32,  wrote:
>> >> --- a/hw/xen/xen_pt.c
>> >> +++ b/hw/xen/xen_pt.c
>> >> @@ -248,7 +248,9 @@ static void xen_pt_pci_write_config(PCID
>> >>  
>> >>  /* check unused BAR register */
>> >>  index = xen_pt_bar_offset_to_index(addr);
>> >> -if ((index >= 0) && (val > 0 && val < XEN_PT_BAR_ALLF) &&
>> >> +if ((index >= 0) && (val != 0) &&
>> >> +(((index != PCI_ROM_SLOT) ?
>> >> +  val : (val | (uint32_t)~PCI_ROM_ADDRESS_MASK)) != 
> XEN_PT_BAR_ALLF) &&
>> > 
>> > The change seems looks good and in line with the commit message. But
>> > this if statement looks like acrobatic circus to me now.
>> 
>> I think the alternative of splitting it up into multiple if()-s would not
>> be any better readable.
> 
> Would you be OK if I rewrote the statement as follows?
> 
> if ((index >= 0) && (val != 0)) {
> uint32_t vu;
> 
> if (index == PCI_ROM_SLOT) {
> vu = val | (uint32_t)~PCI_ROM_ADDRESS_MASK;
> } else {
> vu = val;
> }
> 
> if ((vu != XEN_PT_BAR_ALLF) &&
> (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) {
> XEN_PT_WARN(d, "Guest attempt to set address to unused Base 
> Address "
> "Register. (addr: 0x%02x, len: %d)\n", addr, len);
> }
> }

Actually I agree that this indeed looks better. If I were to make the
change, I'd do

if ((index >= 0) && (val != 0)) {
uint32_t vu = val;

if (index == PCI_ROM_SLOT) {
vu |= (uint32_t)~PCI_ROM_ADDRESS_MASK;
}

if ((vu != XEN_PT_BAR_ALLF) &&
...

though. But if you're going to do the edit without wanting me to
re-submit, I'll certainly leave this to you. Just let me know which
way you prefer it to be handled.

Jan




Re: [Qemu-devel] [RFC] extensions to the -m memory option

2015-06-08 Thread Markus Armbruster
Copying Andreas in case he has further advice.

Peter Crosthwaite  writes:

> On Wed, Jun 3, 2015 at 5:31 AM, Liviu Ionescu  wrote:
>>
>>> On 01 Jun 2015, at 23:36, Liviu Ionescu  wrote:
>>>
>>> ... I just pushed a preliminary version where all STM32 MCUs and
>>> STM32 related boards use the new framework.
>>
>> please disregard the current version, I'm already re-working the
>> code to be more compliant with the (quite complicated) object
>> infrastructure.
>>
>> btw, except from various other pieces of code, more or less
>> up-to-date, is there any document explaining what is the recommended
>> way of using these objects?
>>
>> at least the confusing 'realize' mechanism is it explained somewhere?
>>
>
> http://wiki.qemu.org/QOMConventions
>
> but that doesn't really cover your question.
>
> Some of my rules of thumb.
>
> 1: For container objects, realize is often just a recursive call of
> realize for the contained. It may set properties on the way. see
> hw/arm/xlnx-zynqmp for a recent example.
> 2: Data structure creation should generally go in the init. If it
> depends on a property (e.g. to size an array) then you can delay to
> realize or do it in a property setter callback.
> 3: If it has a user visible failure path it goes in realize
> 4: When in doubt and if it is possible. do things in init, not realize.
> 5: External interface connection is usually realize (e.g. connecting a
> uart or network)
> 6: Do not have any device state modifications in realize. A common
> mistake is to put hardware reset behavior in realize (e.g. initing
> registers to their reset value). Use the DeviceClass::reset callback
> for this.
> 7: When I say init, I mean the object instance_init callback, do not
> use the depracated SysBusDevice::init callback.



Re: [Qemu-devel] fw cfg files cross-version migration races

2015-06-08 Thread Gerd Hoffmann
  Hi,

> > So, sorting entries (and the index assigned too) should fix this, right?
> > That looks easiest to me.
> 
> Presumably, anything happening before (and after) user-provided blobs
> are inserted will continue happening in the same order everywhere.

Which might change in the future though, in case the initialization
order changes for some reason.  It's not a problem we have at the
moment, so this stuff isn't urgent.  But we might face it in the future.

> So we really only have to sort the user provided blobs, so that if
> the same *set* is provided on both ends of the migration, things would
> work out OK.

I would simply sort everything (and do it for new machine types only,
for backward compatibility reasons).  Sorting user-provided blobs only
adds complexity for no reason.

> If a *different* set of blobs is specified on the migration origin vs.
> the migration destination, we lose by default and worrying about it is
> pointless -- did I get that right ?

Yes.  For migration to succeed you have to supply the same configuration
on both ends.  That includes user-provided fw_cfg blobs of course.

cheers,
  Gerd





Re: [Qemu-devel] [PATCH 4/4] acpi: unify rsdp generation

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 09:52:22AM +0800, Shannon Zhao wrote:
> > But how does one test ARM ACPI code?
> > Could you upload an image with instructions to the wiki?
> > http://wiki.qemu.org/System_Images
> > 
> 
> Ok, no problem. But I don't have an account of the wiki.

Stefan, you have admin rights there, right?

-- 
MST



Re: [Qemu-devel] [PATCH 1/2] block-backend: Introduce blk_drain() and replace blk_drain_all()

2015-06-08 Thread Christian Borntraeger
Am 03.06.2015 um 15:46 schrieb Alexander Yarygin:
> Each call of the virtio_blk_reset() function calls blk_drain_all(),
> which works for all existing BlockDriverStates, while only one
> BlockDriverState needs to be drained.
> 
> This patch introduces the blk_drain() function and replaces
> blk_drain_all() on it in virtio_blk_reset().
> 
> Cc: Christian Borntraeger 
> Cc: Cornelia Huck 
> Cc: Kevin Wolf 
> Cc: Paolo Bonzini 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Alexander Yarygin 

Stefan/Kevin,

any opinion on these 2 patches? I would like to have this issue fixed
for s390 as having many disks is quite common on s390. 


> ---
>  block/block-backend.c  | 5 +
>  hw/block/virtio-blk.c  | 2 +-
>  include/sysemu/block-backend.h | 1 +
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 93e46f3..aee8a12 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -700,6 +700,11 @@ int blk_flush_all(void)
>  return bdrv_flush_all();
>  }
> 
> +void blk_drain(BlockBackend *blk)
> +{
> +bdrv_drain(blk->bs);
> +}
> +
>  void blk_drain_all(void)
>  {
>  bdrv_drain_all();
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index e6afe97..abaca58 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -660,7 +660,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
>   * This should cancel pending requests, but can't do nicely until there
>   * are per-device request lists.
>   */
> -blk_drain_all();
> +blk_drain(s->blk);
>  blk_set_enable_write_cache(s->blk, s->original_wce);
>  }
> 
> diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
> index b4a4d5e..8fc960f 100644
> --- a/include/sysemu/block-backend.h
> +++ b/include/sysemu/block-backend.h
> @@ -118,6 +118,7 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, 
> int nb_sectors);
>  int blk_co_flush(BlockBackend *blk);
>  int blk_flush(BlockBackend *blk);
>  int blk_flush_all(void);
> +void blk_drain(BlockBackend *blk);
>  void blk_drain_all(void);
>  BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
>  BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
> 




Re: [Qemu-devel] [RFC PATCH v2] qapi for audio backends

2015-06-08 Thread Gerd Hoffmann
On Fr, 2015-06-05 at 15:54 +0200, Kővágó Zoltán wrote:
> Hi,
> 
> 2015-06-05 12:57 keltezéssel, Gerd Hoffmann írta:
> >> Yeah, I've already hit a problem. The opts_visitor doesn't really handle
> >> nested structs (it just flattens it into a single, non hierarchic
> >> namespace), which is a problem because of the input and output options.
> >> First I need to make them required (the in and out in Audiodev) if I
> >> want the current visitor to visit them at all, but it's still not enough.
> >
> > I think we should improve the visitor instead of making in & out
> > mandatory just because the current implementation (which simply
> > implements the stuff needed for Netdev) can't handle it.
> 
> It's not that simple I think. The visit_optional only receives a field 
> name, but no info about the type of the field, but it has to decide if 
> it wants the field using only this info. So sort of hacking an if 
> (strcmp(name, "in") == 0 || strcmp(name, "out") == 0) ... into the 
> option visitor code, the only way is probably to add a type parameter to 
> visit_optional (struct/union/uint32/whatever) and in the opts visitor if 
> type is struct or union, force visiting it. Is it ok?

Thinking about this a bit more, I suspect it becomes tricky when it
comes to allocation.  We want allocate *in and *out only in case there
are actually parameters for it on the command line.

But I think we have to pass something down to the struct visitor (given
how visitors are designed).  So allocate, pass down, then free again in
case nothing was filled?  How do we figure nothing was filled, in the
generic visit_optional function?

Of course we can avoid this complexity by simply allocating *in and *out
unconditionally, but then it is pointless to make it optional in the
first place.  So maybe it is better to go with your original idea to
just make them mandatory (and all elements inside optional).

cheers,
  Gerd





Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Jan Beulich
>>> On 07.06.15 at 08:23,  wrote:
> On Mon, Apr 20, 2015 at 04:32:12PM +0200, Michael S. Tsirkin wrote:
>> On Mon, Apr 20, 2015 at 03:08:09PM +0100, Jan Beulich wrote:
>> > >>> On 20.04.15 at 15:43,  wrote:
>> > > On Mon, Apr 13, 2015 at 01:51:06PM +0100, Jan Beulich wrote:
>> > >> >>> On 13.04.15 at 14:47,  wrote:
>> > >> > Can you check device capabilities register, offset 0x4 within
>> > >> > pci express capability structure?
>> > >> > Bit 15 is 15 Role-Based Error Reporting.
>> > >> > Is it set?
>> > >> > 
>> > >> > The spec says:
>> > >> > 
>> > >> >   15
>> > >> >   On platforms where robust error handling and PC-compatible 
>> > >> > Configuration 
>> > >> > Space probing is
>> > >> >   required, it is suggested that software or firmware have the 
>> > >> > Unsupported 
>> > >> > Request Reporting Enable
>> > >> >   bit Set for Role-Based Error Reporting Functions, but clear for 
>> > >> > 1.0a 
>> > >> > Functions. Software or
>> > >> >   firmware can distinguish the two classes of Functions by 
>> > >> > examining the 
>> > >> > Role-Based Error Reporting
>> > >> >   bit in the Device Capabilities register.
>> > >> 
>> > >> Yes, that bit is set.
>> > > 
>> > > curiouser and curiouser.
>> > > 
>> > > So with functions that do support Role-Based Error Reporting, we have
>> > > this:
>> > > 
>> > > 
>> > >  With device Functions implementing Role-Based Error Reporting, setting 
>> > > the 
>> > > Unsupported Request
>> > >  Reporting Enable bit will not interfere with PC-compatible 
>> > > Configuration 
>> > > Space probing, assuming
>> > >  that the severity for UR is left at its default of non-fatal. However, 
>> > > setting the Unsupported Request
>> > >  Reporting Enable bit will enable the Function to report UR errors 97 
>> > > detected with posted Requests,
>> > >  helping avoid this case for potential silent data corruption.
>> > 
>> > I still don't see what the PC-compatible config space probing has to
>> > do with our issue.
>> 
>> I'm not sure but I think it's listed here because it causes a ton of URs
>> when device scan probes unimplemented functions.
>> 
>> > > did firmware reconfigure this device to report URs as fatal errors then?
>> > 
>> > No, the Unsupported Request Error Serverity flag is zero.
>> 
>> OK, that's the correct configuration, so how come the box crashes when
>> there's a UR then?
> 
> Ping - any update on this?

Not really. All we concluded so far is that _maybe_ the bridge, upon
seeing the UR, generates a Master Abort, rendering the whole thing
fatal. Otoh the respective root port also has
- Received Master Abort set in its Secondary Status register (but
  that's also already the case in the log that we have before the UR
  occurs, i.e. that doesn't mean all that much),
- Received System Error set in its Secondary Status register (and
  after the UR the sibling endpoint [UR originating from 83:00.0,
  sibling being 83:00.1] also shows Signaled System Error set).

> Do we can chalk this up to hardware bugs on a specific box?

I have to admit that I'm still very uncertain whether to consider all
this correct behavior, a firmware flaw, or a hardware bug.

Jan




Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Pavel Fedin
> I think it would be less confusing to store the entire MPIDR
> here, and then mask it out if there are places that only
> want the affinity bits.

 I thought about it when i developed this, but in my implementation affinity 
bits are assigned during CPU instantiation, while feature bits can be added 
later, during realize. I could tweak set_feature() to sync up MPIDR value but 
perhaps it isn't the best thing to do.
 Additionally, currently we support only ARM_FEATURE_V7MP, which is 32-bit 
only. I believe this would be a more complex modification which would touch 
more of 32-bit code.
 So would it be acceptable to leave mpidr handling as it is?

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia





Re: [Qemu-devel] ELF loader?

2015-06-08 Thread Peter Maydell
On 7 June 2015 at 17:03, Liviu Ionescu  wrote:
>
>> On 07 Jun 2015, at 13:46, Peter Maydell  wrote:
>> ... Some ELF files intended for embedded use have
>> a little self-initializing bit on the front that manually clears
>> their own .bss section, but that's not part of the ELF spec, and
>> QEMU will handle the other kind too.
>
> as I said, in the embedded world things are a bit different, there
> is no ELF loader, or you can consider the debugger as an ELF loader
> since it is reading the ELF file and programming the flash.
>
> but during this step it makes no sense to write/clear the RAM

That depends on what the ELF file is. If I were writing a debugger's
ELF loader code I would make it clear the bss area, because a
debugger is supposed to handle any ELF file you throw at it,
and following the ELF spec is trivial.

-- PMM



Re: [Qemu-devel] [PATCH 2/2] virtio-gpu/2d: add virtio gpu core code

2015-06-08 Thread Gerd Hoffmann
  Hi,

> > +static uint32_t virtio_gpu_get_features(VirtIODevice *vdev, uint32_t 
> > features)
> > +{
> > +return features;
> > +}
> 
> Does this series rely on some other patches? Because in master, 
> VirtioDeviceClass::get_features() is still uint64_t (*)(VirtioDevice *, 
> uint64_t) (which results in "hw/display/virtio-gpu.c:886:23: error: 
> assignment from incompatible pointer type" for me).

master was 32bit until recently (a few days back), now 64bit features
are finally merged and this mess should come to an end ;)

> (in an earlier series, this function was uint64_t ()(VirtIODevice *, 
> uint64_t))

Earlier versions used to depend on unmerged virtio patches, this one was
for master ...

> > +if (ab->nr_entries > 1024) {
> > +qemu_log_mask(LOG_GUEST_ERROR,
> > +  "%s: nr_entries is too big (%d > 1024)\n",
> > +  __func__, ab->nr_entries);
> > +return -1;
> > +}
> 
> Same question I had in said earlier series: Do you want to change this 
> to 16384, because 1024 may be too small?

Oops, fixed.

> > +#define VIRTIO_GPU_MAX_RES 16
> 
> Still unused. Still intentional? :-)

Fixed too.

cheers,
  Gerd





Re: [Qemu-devel] [PATCH] configure: Unify arm and aarch64 disas configury

2015-06-08 Thread Peter Maydell
On 7 June 2015 at 20:19, Peter Crosthwaite  wrote:
> On Sun, Jun 7, 2015 at 3:51 AM, Peter Maydell  
> wrote:
>> worked when I looked at it :-)  We'd get duplicate lines in
>> the config files, but we already have those when host==target.
>>
>
> Brute force it and sort -u? sort is considered a coreutil by at least
> my manpages so I wonder if we can assume it is universally available.

It's only a minor cosmetic thing in a file nobody looks at, so I'm
not sure it's worth the effort.

(We already use 'sort -u' in our Makefiles, incidentally.)

-- PMM



Re: [Qemu-devel] [PATCH 4/4] i386/acpi-build: build_crs(): fetch BAR from PCI config space directly

2015-06-08 Thread Laszlo Ersek
On 06/07/15 11:23, Michael S. Tsirkin wrote:
> On Sat, Jun 06, 2015 at 01:46:29AM +0200, Laszlo Ersek wrote:
>> OVMF downloads the ACPI linker/loader script from QEMU when the edk2 PCI
>> Bus driver globally signals the firmware that PCI enumeration and resource
>> allocation have completed. At this point QEMU regenerates the ACPI payload
>> in an fw_cfg read callback, and this is when the PXB's _CRS gets
>> populated.
>>
>> Unfortunately, when this happens, the PCI_COMMAND_MEMORY bit is clear in
>> the root bus's command register, *unlike* under SeaBIOS. The consequences
>> unfold as follows:
>>
>> - When build_crs() fetches dev->io_regions[i].addr, it is all-bits-one,
>>   because pci_update_mappings() --> pci_bar_address() calculated it as
>>   PCI_BAR_UNMAPPED, due to the PCI_COMMAND_MEMORY bit being clear.
>>
>> - Consequently, the SHPC MMIO BAR (bar 0) of the bridge is not added to
>>   the _CRS, *despite* having been programmed in PCI config space.
>>
>> - Similarly, the SHPC MMIO BAR of the PXB is not removed from the main
>>   root bus's DWordMemory descriptor.
>>
>> - Guest OSes (Linux and Windows alike) notice the pre-programmed SHPC BAR
>>   within the PXB's config space, and notice that it conflicts with the
>>   main root bus's memory resource descriptors. Linux reports
>>
>>   pci :04:00.0: BAR 0: can't assign mem (size 0x100)
>>   pci :04:00.0: BAR 0: trying firmware assignment [mem
>>0x8820-0x882000ff 64bit]
>>   pci :04:00.0: BAR 0: [mem 0x8820-0x882000ff 64bit] conflicts
>>with PCI Bus :00 [mem
>>0x8820-0xfebf]
>>
>>   While Windows Server 2012 R2 reports
>>
>> https://technet.microsoft.com/en-us/library/cc732199%28v=ws.10%29.aspx
>>
>> This device cannot find enough free resources that it can use. If you
>> want to use this device, you will need to disable one of the other
>> devices on this system. (Code 12)
>>
>> (This issue was apparently encountered earlier, see:
>>
>>   https://lists.nongnu.org/archive/html/qemu-devel/2015-01/msg02983.html
>>
>> and the current hole-punching logic in build_crs() and build_ssdt() is
>> probably supposed to remedy exactly that problem -- however, for OVMF they
>> don't work, because at the end of the PCI enumeration and resource
>> allocation, which cues the ACPI linker/loader client, the command register
>> is clear.)
>>
>> The solution is to fetch the BAR addresses from PCI config space directly,
>> for the purposes of build_crs(), regardless of the PCI command register
>> and any MemoryRegion state.
>>
>> Example MMIO maps for a 2GB guest:
>>
>> SeaBIOS:
>>
>>   main: 0x8000..0xFC00 / 0x7C00
>>   pxb:  0xFC00..0xFC20 / 0x0020
>>   main: 0xFC20..0xFC213000 / 0x00013000
>>   pxb:  0xFC213000..0xFC213100 / 0x0100 <- SHPC BAR
>>   main: 0xFC213100..0xFEA0 / 0x027ECF00
>>   pxb:  0xFEA0..0xFEC0 / 0x0020
>>
>> OVMF, without the fix:
>>
>>   main: 0x8000..0x8810 / 0x0810
>>   pxb:  0x8810..0x8820 / 0x0010
>>   main: 0x8820..0xFEC0 / 0x76A0
>>
>> OVMF, with the fix:
>>
>>   main: 0x8000..0x8810 / 0x0810
>>   pxb:  0x8810..0x8820 / 0x0010
>>   pxb:  0x8820..0x88200100 / 0x0100 <- SHPC BAR
>>   main: 0x88200100..0xFEC0 / 0x769FFF00
>>
>> (Note that under OVMF the PCI enumerator includes requests for
>> prefetchable memory in the nonprefetchable memory pool -- separate windows
>> for nonprefetchable and prefetchable memory are not supported (yet) --
>> that's why there's one fewer pxb range in the corrected OVMF case than in
>> the SeaBIOS case.)
>>
>> Cc: Marcel Apfelbaum 
>> Cc: Michael S. Tsirkin 
>> Signed-off-by: Laszlo Ersek 
> 
> This is problematic - disabled BAR values have no meaning according to
> the PCI spec.
> 
> It might be best to add a property to just disable shpc in the bridge so
> no devices reside directly behind the pxb?

If that solves this problem (and I do think it should), then I don't
have anything against the idea. I guess I could look at other property
code for examples. I think I'd have the main problem with implementing
the "policy" part: when exactly do you flip the new property to false,
and where. (Also, I'm not really sure what the SHPC bar is good for, and
if it won't be missed by someone who wanted to use PXB otherwise.)

Thanks
Laszlo

>> ---
>>  hw/i386/acpi-build.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index b71e942..60d4f75 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -784,8 +784,8 @@ static Aml *build_crs(PCIHostState *host,
>>  for (i = 0; i < PCI_NUM_REGIONS; i++) {
>>  PCIIORegion *r = &dev->io_regions[i];
>>  
>> -range_base = r->addr;
>> -range_limit = r->addr + r->size - 1;
>> +range_base = pci

Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 08:50, Pavel Fedin  wrote:
>> I think it would be less confusing to store the entire MPIDR
>> here, and then mask it out if there are places that only
>> want the affinity bits.
>
>  I thought about it when i developed this, but in my implementation
> affinity bits are assigned during CPU instantiation, while feature
> bits can be added later, during realize. I could tweak set_feature()
> to sync up MPIDR value but perhaps it isn't the best thing to do.

Can we not just assign the whole thing at realize?

>  Additionally, currently we support only ARM_FEATURE_V7MP, which is
> 32-bit only. I believe this would be a more complex modification
> which would touch more of 32-bit code.

It's only 32-bit in the sense that all 64-bit systems (indeed, all v8
systems) have the feature and so the bit is effectively RES1.

You need to fix the 32-bit side anyway.

>  So would it be acceptable to leave mpidr handling as it is?

I still think that having the whole mpidr in the struct will
be less confusing.

thanks
-- PMM



Re: [Qemu-devel] [PATCH 0/4] blockdev: Defer creation of implicit PCI devices for IF_VIRTIO drives

2015-06-08 Thread Christian Borntraeger
Am 04.06.2015 um 18:20 schrieb Peter Maydell:
> blockdev.c will create implicit virtio-blk-* devices for IF_VIRTIO
> drives. I want to turn this on for the ARM virt board (now it has PCI),
> so that users can use shorter and more comprehensible command lines.
> 
> Unfortunately, the code as it stands will always create an implicit
> device, which means that setting the virt block_default_type to IF_VIRTIO
> would break existing command lines like:
> 
>   -drive file=arm-wheezy.qcow2,id=foo
>   -device virtio-blk-pci,drive=foo
> 
> because the creation of the drive causes us to create an implied
> virtio-blk-pci which steals the drive, and then the explicit
> device creation fails because 'foo' is already in use.
> 
> This patchset fixes this problem by deferring the creation of the
> implicit devices to drive_check_orphaned(), which means we can do
> it only for the drives which haven't been explicitly connected to
> a device by the user.
> 
> The slight downside of deferral is that it is effectively rearranging
> the order in which the devices are created, which means they will
> end up in different PCI slots, etc. We can get away with this for PCI,
> because at the moment the only machines which set their block_default_type
> to IF_VIRTIO are the S390X ones. I have special-cased S390X to retain
> the old behaviour, which is a bit ugly but functional. If S390X doesn't
> care about cross-version migration we can probably drop that and
> just have everything defer. S390X people?

Actually we will care about cross-version migration, the thing is that 
2.4 will be the the first version that migrates all necessary pieces
like local interrupts for current Linux guests. 2.3 and before did work
from time to time but not reliable under load (1)

So migration between 2.3 and 2.4 was broken anyway and we intend to 
care about migration compatiblity with 2.4 and later.

Now: The implicit defintion does only work for the non-ccw machine, due
to the limited aliasing capabilities 

e.g.
# qemu-system-s390x -nographic -enable-kvm -machine s390-ccw .. -drive 
file=image,format=raw,id=d1 -device virtio-blk-ccw,drive=d1
fails
qemu-system-s390x: -drive file=image,format=raw,id=d1: No 's390-virtio-bus' bus 
found for device 'virtio-blk-s390'

but 

# qemu-system-s390x -nographic -enable-kvm -machine s390-ccw .. -drive 
file=image,format=raw,id=d1,if=none -device virtio-blk-ccw,drive=d1
^^^
works


So I would prefer to not have this workaround and doing 

index c480f64..7627d57 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -976,17 +976,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, 
BlockInterfaceType block_default_type)
 goto fail;
 }
 
-if (type == IF_VIRTIO && !done_orphan_check
-&& arch_type == QEMU_ARCH_S390X) {
-/* Virtio drives created on the command line get an implicit device
- * created for them. For non-s390x command line drives, the creation
- * of the implicit device is deferred to drive_check_orphaned. (S390x
- * is special-cased purely for backwards compatibility.)
- * Drives created via the monitor (hotplugged) do not get the
- * magic implicit device created for them.
- */
-create_implicit_virtio_device(qdict_get_str(bs_opts, "id"), devaddr);
-}
 
 filename = qemu_opt_get(legacy_opts, "file");

actually enables the first command line to work as well.
So lets get rid of the s390 special handling (but I really appreciate that
you cared about s390)


(1) storage key migration is still missing but newer Linux guests do
not care. Lets see if we can provide this before 2.4
> 
> The code in master didn't seem to take much account of the possibility
> of hotplug -- if the user created a drive via the monitor we would
> apparently try to create the implicit drive, but in fact not do so
> because vl.c had already done device creation long before. I've included
> a patch that makes it more explicit that hotplug does not get you the
> magic implicit devices.
> 
> The last patch is the oneliner to enable the default for virt once
> the underlying stuff lets us do this without breaking existing user
> command lines.
> 
> 
> Peter Maydell (4):
>   blockdev: Factor out create_implicit_virtio_device
>   blockdev: Don't call create_implicit_virtio_device() when it has no
> effect
>   blockdev: Defer creation of implicit PCI devices for IF_VIRTIO drives
>   hw/arm/virt: Make block devices default to virtio
> 
>  blockdev.c| 72 
> +++
>  hw/arm/virt.c |  1 +
>  2 files changed, 59 insertions(+), 14 deletions(-)
> 




Re: [Qemu-devel] [Xen-devel] qemu mainline regression with xen-unstable: unable to start QMP

2015-06-08 Thread Markus Armbruster
Eric Blake  writes:

> [adding Markus, as author of the regression]
>
> On 06/04/2015 03:59 PM, Don Slutz wrote:
>> On 06/04/15 11:04, Fabio Fantoni wrote:
>>> Today after trying xen-unstable build (tested many hours) of some days
>>> ago I tried update qemu to latest development version (from git master
>>> commit 6fa6b312765f698dc81b2c30e7eeb9683804a05b) and seems that there is
>>> a regression:
 xl create /etc/xen/W7.cfg
 Parsing config from /etc/xen/W7.cfg
 libxl: error: libxl_qmp.c:287:qmp_handle_error_response: received an
 error message from QMP server: QMP input object member 'id' is unexpected
 libxl: error: libxl_qmp.c:715:libxl__qmp_initialize: Failed to connect
 to QMP
>>>
>> 
>> This is caused by:
>> 
>> commit 65207c59d99f2260c5f1d3b9c491146616a522aa
>> Author: Markus Armbruster 
>> Date:   Thu Mar 5 14:35:26 2015 +0100
>> 
>> monitor: Drop broken, unused asynchronous command interface

Yes.  I screwed up.

>> The patch:
>> 
>>>From 1b0221078353870fe530e49de158cae205f9bce5 Mon Sep 17 00:00:00 2001
>> From: Don Slutz 
>> Date: Thu, 4 Jun 2015 17:04:42 -0400
>> Subject: [PATCH 01/14] monitor: Allow Xen's (broken) usage of asynchronous
>>  command interface.
>> 
>> commit 65207c59d99f2260c5f1d3b9c491146616a522aa
>> Author: Markus Armbruster 
>> Date:   Thu Mar 5 14:35:26 2015 +0100
>> 
>> monitor: Drop broken, unused asynchronous command interface
>> 
>> Breaks Xen.  Add a hack un unbreak it.
>
> s/un /to /
>
>> 
>> Xen is only doing synchronous commands, but is including an id.
>
> QMP also uses id, but apparently removes it up front before calling into
> this function; so another fix would be having xen remove it up front.

I don't think so:

{"QMP": {"version": {"qemu": {"micro": 50, "minor": 3, "major": 2}, 
"package": ""}, "capabilities": []}}
{ "execute": "qmp_capabilities" }
{"return": {}}
{"execute": "system_reset", "id": "1"}
{"error": {"class": "GenericError", "desc": "QMP input object member 'id' 
is unexpected"}}

>> Signed-off-by: Don Slutz 
>> ---
>>  monitor.c | 9 +
>>  1 file changed, 9 insertions(+)
>> 
>> diff --git a/monitor.c b/monitor.c
>> index c7baa91..e9a0747 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -4955,6 +4955,15 @@ static QDict *qmp_check_input_obj(QObject
>> *input_obj, Error **errp)
>>"arguments", "object");
>>  return NULL;
>>  }
>> +} else if (!strcmp(arg_name, "id")) {
>> +/*
>> + * Fixup Xen's usage. Just ignore the "id". See point #5
>> + * above.  This was an attempt at an asynchronous
>> + * command interface.  However commit
>> + * 65207c59d99f2260c5f1d3b9c491146616a522aa is
>> + * wrong. Xen does not expect an error when it passes in
>> + * "id":1, so just continue to ignore it.
>> + */
>
> The comment is a bit verbose, particularly since 'id' is a
> well-established usage pattern in QMP.  Also, we don't need to call out
> why it changed in the comment here, the commit message is sufficient for
> that.

Yes.  I'll post a patch with a more suitable comment and commit message.

>>  } else {
>>  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
>>  return NULL;
>> 

I apologize for the mess I made, and my slow reply (offline since
Wednesday night).



Re: [Qemu-devel] [Bug 1462131] [NEW] qemu mainline regression with xen-unstable: unable to start QMP

2015-06-08 Thread Markus Armbruster
Wen Congyang  writes:

> On 06/05/2015 06:09 AM, Don Slutz wrote:
>> Public bug reported:
>> 
>> On 06/04/15 11:04, Fabio Fantoni wrote:
>>> Today after trying xen-unstable build (tested many hours) of some days
>>> ago I tried update qemu to latest development version (from git master
>>> commit 6fa6b312765f698dc81b2c30e7eeb9683804a05b) and seems that there is
>>> a regression:
 xl create /etc/xen/W7.cfg
 Parsing config from /etc/xen/W7.cfg
 libxl: error: libxl_qmp.c:287:qmp_handle_error_response: received an
 error message from QMP server: QMP input object member 'id' is unexpected
 libxl: error: libxl_qmp.c:715:libxl__qmp_initialize: Failed to connect
 to QMP
>>>
>> 
>> This is caused by:
>> 
>> commit 65207c59d99f2260c5f1d3b9c491146616a522aa
>> Author: Markus Armbruster 
>> Date:   Thu Mar 5 14:35:26 2015 +0100
>> 
>> monitor: Drop broken, unused asynchronous command interface
>> 
>> 
>>> DomU is working but operations that require QMP not (for example
>>> save/restore).
>>>
>>> Thanks for any reply and sorry for my bad english.
>> 
>> ** Affects: qemu
>>  Importance: Undecided
>>  Status: New
>> 
>> ** Patch added: 
>> "0001-monitor-Allow-Xen-s-broken-usage-of-asynchronous-com.patch"
>>
>> https://bugs.launchpad.net/bugs/1462131/+attachment/4410103/+files/0001-monitor-Allow-Xen-s-broken-usage-of-asynchronous-com.patch
>> 
>
> Please try this patch:
>
>>From 061dc4ed1012726fc0b8e1c6d0a784476f5c6d48 Mon Sep 17 00:00:00 2001
> From: Wen Congyang 
> Date: Fri, 5 Jun 2015 16:17:18 +0800
> Subject: [PATCH] monitor: Allow the "id" key
>
> The management application still needs the "id" key.
>
> Signed-off-by: Wen Congyang 
> ---
>  monitor.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/monitor.c b/monitor.c
> index c7baa91..307dc53 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -4955,6 +4955,8 @@ static QDict *qmp_check_input_obj(QObject *input_obj, 
> Error **errp)
>"arguments", "object");
>  return NULL;
>  }
> +} else if (!strcmp(arg_name, "id")) {
> +/* FIXME: check duplicated IDs for async commands */
>  } else {
>  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
>  return NULL;

Commit 65207c5's removal of the code is wrong (and your patch fixes it),
but its removal of the comment is correct.  I'll post a revised patch
shortly.

Thanks!



Re: [Qemu-devel] [BUG] Monitor QMP is broken ?

2015-06-08 Thread Markus Armbruster
"Daniel P. Berrange"  writes:

> On Fri, Jun 05, 2015 at 04:58:46PM +0300, Pavel Fedin wrote:
>>  Hello! 
>> 
>> >  I have updated my qemu to the recent version and it seems to have
>> > lost compatibility
>> with
>> > libvirt. The error message is:
>> > --- cut ---
>> > internal error: unable to execute QEMU command 'qmp_capabilities':
>> > QMP input object
>> > member
>> > 'id' is unexpected
>> > --- cut ---
>> >  What does it mean? Is it intentional or not?
>> 
>>  I have found the problem. It is caused by commit
>> 65207c59d99f2260c5f1d3b9c491146616a522aa. libvirt does not seem to
>> use the removed
>> asynchronous interface but it still feeds in JSONs with 'id' field
>> set to something. So i
>> think the related fragment in qmp_check_input_obj() function should
>> be brought back
>
> If QMP is rejecting the 'id' parameter that is a regression bug.

It is definitely a regression, my fault, and I'll get it fixed a.s.a.p.

[...]



Re: [Qemu-devel] [PATCH] Do not fail if id field is present.

2015-06-08 Thread Markus Armbruster
Eric Blake  writes:

> On 06/05/2015 08:32 AM, Daniel P. Berrange wrote:
>> On Fri, Jun 05, 2015 at 05:17:29PM +0300, Pavel Fedin wrote:
>>> This fixes QMP regression:
>>> http://lists.gnu.org/archive/html/qemu-devel/2015-06/msg01795.html
>>>
>>> Signed-off-by: Pavel Fedin 
>>> ---
>>>  monitor.c | 2 ++
>>>  1 file changed, 2 insertions(+)
>>>
>>> diff --git a/monitor.c b/monitor.c
>>> index c7baa91..ef21bba 100644
>>> --- a/monitor.c
>>> +++ b/monitor.c
>>> @@ -4955,6 +4955,8 @@ static QDict *qmp_check_input_obj(QObject *input_obj, 
>>> Error **errp)
>>>"arguments", "object");
>>>  return NULL;
>>>  }
>>> +} else if (!strcmp(arg_name, "id")) {
>>> +/* Ignored, necessary for backwards compatibility */
>>>  } else {
>>>  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
>>>  return NULL;
>> 
>> This should probably be accompanied by an update to docs/qmp/qmp-spec.txt
>> to say this is ignored and remove the bit about it being copied into the
>> replies
>
> Not necessary - the "id" string is once again preserved correctly on
> synchronous commands with this patch applied:
>
> $ ./x86_64-softmmu/qemu-system-x86_64 -qmp stdio
> {"QMP": {"version": {"qemu": {"micro": 50, "minor": 3, "major": 2},
> "package": ""}, "capabilities": []}}
> {"execute":"qmp_capabilities","id":1}
> {"return": {}, "id": 1}
>
> Remember, Markus' patch was about removing asynchronous commands,
> because _those_ commands were where "id" was mishandled (and if we DID
> want asynch commands, it would be even MORE important that they handle
> id corerctly).  But he accidentally removed 2 lines (the strcmp to "id"
> and a now stale FIXME comment about wanting to check for duplicate "id"s
> being tracked across parallel async commands) when it should have only
> removed one (the stale comment),

Exactly.

>  and I made the mistake of giving R-by
> based on code review and NOT an actual build-and-run of the applied
> patch (or I would have instantly spotted the side effect that "id" was
> broken on synchronous commands).  But an empty if clause looks
> suspicious, so we want a better comment in place of the stale comment,
> which is why I suggest:
>
> /* Specially handled elsewhere to be included in reply to user */
>
> At any rate, with the if statement restored, I can now state:
>
> Tested-by: Eric Blake 
>
> and since we now have no less than three threads pointing out the issue,
> I hope we can settle on a solution with a nice comment and get it in the
> tree soon.

Working on it.



Re: [Qemu-devel] [PATCH] Do not fail if id field is present.

2015-06-08 Thread Markus Armbruster
Paolo Bonzini  writes:

> On 05/06/2015 16:17, Pavel Fedin wrote:
>> This fixes QMP regression:
>> http://lists.gnu.org/archive/html/qemu-devel/2015-06/msg01795.html
>> 
>> Signed-off-by: Pavel Fedin 
>> ---
>>  monitor.c | 2 ++
>>  1 file changed, 2 insertions(+)
>> 
>> diff --git a/monitor.c b/monitor.c
>> index c7baa91..ef21bba 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -4955,6 +4955,8 @@ static QDict *qmp_check_input_obj(QObject *input_obj, 
>> Error **errp)
>>"arguments", "object");
>>  return NULL;
>>  }
>> +} else if (!strcmp(arg_name, "id")) {
>> +/* Ignored, necessary for backwards compatibility */
>>  } else {
>>  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
>>  return NULL;
>> 
>
> Nope, this is not enough to fix virt-test.

Really?  Details?



Re: [Qemu-devel] Windows2012R2 virtio-win drivers and IPv6

2015-06-08 Thread Peter Lieven

Am 28.05.2015 um 16:24 schrieb Vadim Rozenfeld:

On Thu, 2015-05-28 at 16:07 +0200, Peter Lieven wrote:

Hi,

we observed problems with Windows Update Services and Uploading Files from a 
Windows System to other systems.
We tracked this down to IPv6 connections only. IPv4 and IPv6 in Receive 
direction seems to be not a problem.
It turned out that setting TCP/UDP Checksum Offloading for IPv6 from "RX/TX enabled" to 
RX "enabled" in the
Network Driver settings solved the issue. I remember similar problems existed 
with Linux some time ago.
Linux guests on the same host are also not an issue.

Is this a known issue? I tried latest and stable drivers from the Fedora 
Website.


Adding Yan.


Hi Yan,

have you had a chance to look at this or a hint where to start?

Thanks,
Peter




Re: [Qemu-devel] [PATCH] Do not fail if id field is present.

2015-06-08 Thread Paolo Bonzini


On 08/06/2015 10:10, Markus Armbruster wrote:
> > > +} else if (!strcmp(arg_name, "id")) {
> > > +/* Ignored, necessary for backwards compatibility */
> > >  } else {
> > >  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
> > >  return NULL;
> > > 
> >
> > Nope, this is not enough to fix virt-test.
> Really?  Details?

It really wants the id in the reply to match the id in the request.

Paolo



Re: [Qemu-devel] [PATCH 0/4] blockdev: Defer creation of implicit PCI devices for IF_VIRTIO drives

2015-06-08 Thread Christian Borntraeger
Am 08.06.2015 um 10:02 schrieb Christian Borntraeger:
> So I would prefer to not have this workaround and doing 
> 
> index c480f64..7627d57 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -976,17 +976,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, 
> BlockInterfaceType block_default_type)
>  goto fail;
>  }
>  
> -if (type == IF_VIRTIO && !done_orphan_check
> -&& arch_type == QEMU_ARCH_S390X) {
> -/* Virtio drives created on the command line get an implicit device
> - * created for them. For non-s390x command line drives, the creation
> - * of the implicit device is deferred to drive_check_orphaned. (S390x
> - * is special-cased purely for backwards compatibility.)
> - * Drives created via the monitor (hotplugged) do not get the
> - * magic implicit device created for them.
> - */
> -create_implicit_virtio_device(qdict_get_str(bs_opts, "id"), devaddr);
> -}
>  
>  filename = qemu_opt_get(legacy_opts, "file");
> 
> actually enables the first command line to work as well.
> So lets get rid of the s390 special handling (but I really appreciate that
> you cared about s390)

As a side note, I cannot test this with libvirt right now, as current qemu broke
libvirts capability query
see
https://lists.gnu.org/archive/html/qemu-devel/2015-06/msg01806.html
https://lists.gnu.org/archive/html/qemu-devel/2015-06/msg01488.html


Christian




Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Thibaut Collet
Hi,

My understanding of gratuitous packet with virtio for any backend (vhost
user or other):
- When the VM is loaded (first start or migration) the virtio net
interfaces are loaded ( virtio_net_load_device function in
hw/net/virtio-net.c)
- If the guest has the VIRTIO_NET_F_GUEST_ANNOUNCE capability, request to
send gratuitous packet is done.

1. To enable gratuitous packet through this mechanism I have added
VIRTIO_NET_F_GUEST_ANNOUNCE
capability to hw/net/vhost_net.c. So host and guest can negotiate this
feature when vhost-user is used.

2. self announce occurs in case of live migration. During a live migration
a GARP is sent to all net backend through a queue dedicated to the net
backend.
   But for vhost-user:
   - this operation is not possible (vhost-user has no queue)
   - it is already done with the previous mechanism.
   Rather to define a queue to vhost user and notify twice the guest to
send gratuitous packet I have disable GARP from self announce and use only
the first mechanism for that.

I have tested my modifications with guest that supports
VIRTIO_NET_F_GUEST_ANNOUNCE and vhost-user on the host. After a live
migration I have the GARP from the guest.

Regards.

On Mon, Jun 8, 2015 at 7:55 AM, Jason Wang  wrote:

>
>
> On 06/05/2015 09:24 PM, Thibaut Collet wrote:
> > Add VIRTIO_NET_F_GUEST_ANNOUNCE capability to vhost-net when netdev
> backend is
> > vhost-user.
> >
> > For netdev backend using virtio-net NIC the self announce is managed
> directly
> > by the virtio-net NIC and not by the netdev backend itself.
> > To avoid duplication of announces (once from the guest and once from
> QEMU) a
> > bitfield is added in the NetClientState structure.
> > If this bit is set self announce does not send message to the guest to
> request
> > gratuitous ARP but let virtio-net NIC set the VIRTIO_NET_S_ANNOUNCE for
> > gratuitous ARP.
> >
> > Signed-off-by: Thibaut Collet 
> > ---
> > v2: do not discard anymore packets send to vhost-user: it is GARP
> request after
> > live migration.
> > As suggested by S. Hajnoczi qemu_announce_self skips virtio-net NIC
> that
> > already send GARP.
> >
> >  hw/net/vhost_net.c |2 ++
> >  include/net/net.h  |1 +
> >  net/vhost-user.c   |2 ++
> >  savevm.c   |   11 ---
> >  4 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 426b23e..a745f97 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -82,6 +82,8 @@ static const int user_feature_bits[] = {
> >  VIRTIO_NET_F_CTRL_MAC_ADDR,
> >  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
> >
> > +VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +
> >  VIRTIO_NET_F_MQ,
> >
> >  VHOST_INVALID_FEATURE_BIT
> > diff --git a/include/net/net.h b/include/net/net.h
> > index e66ca03..a78e9df 100644
> > --- a/include/net/net.h
> > +++ b/include/net/net.h
> > @@ -85,6 +85,7 @@ struct NetClientState {
> >  char *name;
> >  char info_str[256];
> >  unsigned receive_disabled : 1;
> > +unsigned self_announce_disabled : 1;
> >  NetClientDestructor *destructor;
> >  unsigned int queue_index;
> >  unsigned rxfilter_notify_enabled:1;
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > index 8d26728..b345446 100644
> > --- a/net/vhost-user.c
> > +++ b/net/vhost-user.c
> > @@ -147,6 +147,8 @@ static int net_vhost_user_init(NetClientState *peer,
> const char *device,
> >
> >  s = DO_UPCAST(VhostUserState, nc, nc);
> >
> > +/* Self announce is managed directly by virtio-net NIC */
> > +s->nc.self_announce_disabled = 1;
>
> Shouldn't we do this in set_features consider it needs guest support or
> you want to disable gratuitous packet for vhost-user at all?
>
> >  /* We don't provide a receive callback */
> >  s->nc.receive_disabled = 1;
> >  s->chr = chr;
> > diff --git a/savevm.c b/savevm.c
> > index 3b0e222..7a134b1 100644
> > --- a/savevm.c
> > +++ b/savevm.c
> > @@ -80,11 +80,16 @@ static void qemu_announce_self_iter(NICState *nic,
> void *opaque)
> >  {
> >  uint8_t buf[60];
> >  int len;
> > +NetClientState *nc;
> >
> > -trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> > -len = announce_self_create(buf, nic->conf->macaddr.a);
> > +nc = qemu_get_queue(nic);
> >
> > -qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
> > +if (!nc->peer->self_announce_disabled) {
> > +
> trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> > +len = announce_self_create(buf, nic->conf->macaddr.a);
> > +
> > +qemu_send_packet_raw(nc, buf, len);
> > +}
> >  }
> >
> >
>
>


Re: [Qemu-devel] [PATCH] Do not fail if id field is present.

2015-06-08 Thread Wen Congyang
On 06/08/2015 04:17 PM, Paolo Bonzini wrote:
> 
> 
> On 08/06/2015 10:10, Markus Armbruster wrote:
 +} else if (!strcmp(arg_name, "id")) {
 +/* Ignored, necessary for backwards compatibility */
  } else {
  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
  return NULL;

>>>
>>> Nope, this is not enough to fix virt-test.
>> Really?  Details?
> 
> It really wants the id in the reply to match the id in the request.

In handle_qmp_command():
mon->qmp.id = qdict_get(input, "id");
qobject_incref(mon->qmp.id);
In monitor_protocol_emitter():
if (mon->qmp.id) {
qdict_put_obj(qmp, "id", mon->qmp.id);
mon->qmp.id = NULL;
}

So I think there is no problems for virt-test.

Thanks
Wen Congyang

> 
> Paolo
> 
> 




Re: [Qemu-devel] [PATCH] Do not fail if id field is present.

2015-06-08 Thread Paolo Bonzini


On 08/06/2015 10:08, Markus Armbruster wrote:
> > Remember, Markus' patch was about removing asynchronous commands,
> > because _those_ commands were where "id" was mishandled (and if we DID
> > want asynch commands, it would be even MORE important that they handle
> > id corerctly).  But he accidentally removed 2 lines (the strcmp to "id"
> > and a now stale FIXME comment about wanting to check for duplicate "id"s
> > being tracked across parallel async commands) when it should have only
> > removed one (the stale comment),
> 
> Exactly.

Oh, then it does fix virt-test too.

Paolo



Re: [Qemu-devel] ELF loader?

2015-06-08 Thread Liviu Ionescu

> On 08 Jun 2015, at 10:50, Peter Maydell  wrote:
> 
> On 7 June 2015 at 17:03, Liviu Ionescu  wrote:
>> 
>>> On 07 Jun 2015, at 13:46, Peter Maydell  wrote:
>>> ... Some ELF files intended for embedded use have
>>> a little self-initializing bit on the front that manually clears
>>> their own .bss section, but that's not part of the ELF spec, and
>>> QEMU will handle the other kind too.
>> 
>> as I said, in the embedded world things are a bit different, there
>> is no ELF loader, or you can consider the debugger as an ELF loader
>> since it is reading the ELF file and programming the flash.
>> 
>> but during this step it makes no sense to write/clear the RAM
> 
> That depends on what the ELF file is.

yes and no.

in this peculiar case, the problem is the simplistic approach in the QEMU ELF 
loader, that uses only the header defs, ignoring the detailed definitions in 
the separate sections.

if you look carefully to the section in my file, you'll notice that the .bss 
section is **not** marked as LOAD (so the linker honoured the NOLOAD request), 
but the memory range in the header is marked as LOAD (I don't know the header 
structures, and the specs, but I would not exclude a linker bug).

> If I were writing a debugger's
> ELF loader code I would make it clear the bss area, because a
> debugger is supposed to handle any ELF file you throw at it,
> and following the ELF spec is trivial.

for a debugger ELF loader, clearing the bss area would simply hide possible 
problems in the embedded startup code. 

for the startup code to clear the entire .bss area it requires correct linker 
scripts, which (unlike those used in the linux world, where you never deal with 
them), are part of each project and need to be manually maintained (a major 
problem for beginners), and it is usual to get linker scripts out of sync with 
the startup code requirement, and unstable embedded applications.

if the startup is buggy, and does not clear the .bss, or does not copy .data 
from flash, the program might not run properly when standalone if it assumes 
that all variables start as 0, although it runs fine when started by the 
debugger, since the debugger cleared those variables.

fortunately the ELF loader used in GDB does a good job in identifying the 
sections that need to be programmed (unlike the implementation used in QEMU).


regards,

Liviu




[Qemu-devel] [PATCH 0/1] monitor: Fix QMP ABI breakage around "id"

2015-06-08 Thread Markus Armbruster
First, my apologies for screwing this up, and for my slow reaction
(I've been offline since Wednesday night).

A number of patches for the regression have been posted.  They all
change the code the same way, but either restore a comment that's no
longer correct, or add a new one that isn't quite right.

This patch is again the same code change, but with a better comment.

I credited the authors of the prior patches by adding their S-o-B.
Hope that's alright.

A test case protecting us from such stupid mistakes would be useful.
Left for later, as we need to fix the regression right away.

Peter, can you apply this directly, or do you need me to post a pull
request.

Markus Armbruster (1):
  monitor: Fix QMP ABI breakage around "id"

 monitor.c | 2 ++
 1 file changed, 2 insertions(+)

-- 
1.9.3




[Qemu-devel] [PATCH 1/1] monitor: Fix QMP ABI breakage around "id"

2015-06-08 Thread Markus Armbruster
Commit 65207c5 accidentally dropped a line of code we need along with
a comment that became wrong then.  This made QMP reject "id":

{"execute": "system_reset", "id": "1"}
{"error": {"class": "GenericError", "desc": "QMP input object member 'id' 
is unexpected"}}

Put the lost line right back, so QMP again accepts and returns "id",
as promised by the ABI:

{"execute": "system_reset", "id": "1"}
{"return": {}, "id": "1"}

Reported-by: Fabio Fantoni 
Signed-off-by: Markus Armbruster 
Signed-off-by: Don Slutz 
Tested-by: Fabio Fantoni 
Signed-off-by: Wen Congyang 
Signed-off-by: Pavel Fedin 
Tested-by: Eric Blake 
Signed-off-by: Markus Armbruster 
---
 monitor.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/monitor.c b/monitor.c
index c7baa91..9afee7b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4955,6 +4955,8 @@ static QDict *qmp_check_input_obj(QObject *input_obj, 
Error **errp)
   "arguments", "object");
 return NULL;
 }
+} else if (!strcmp(arg_name, "id")) {
+/* Any string is acceptable as "id", so nothing to check */
 } else {
 error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
 return NULL;
-- 
1.9.3




Re: [Qemu-devel] Trying to execute code outside RAM or ROM at 0x08000230

2015-06-08 Thread Liviu Ionescu

> On 07 Jun 2015, at 01:11, Peter Maydell  wrote:
> 
> One handy debugging tool for tracking down this kind of thing is to
> use the QEMU monitor's "info mtree" command,

I added "-monitor stdio" to the Eclipse configuration used to start the 
emulator, and I got access to the monitor, but apparently it uses some fancy 
terminal commands, that are not properly handled by the Eclipse console:

(qemu) info mtree
iininfinfoinfo info 
minfo mtinfo 
mtrinfo mtreinfo 
mtree
Execute 'mon info mtree'.

Q: is there any simple way to get rid of them?


as for memory map, I get:

memory
- (prio 0, RW): system
  -0001 (prio 0, R-): cortexm-mem-flash
  0800-0801 (prio 0, R-): alias stm32-mem-flash-alias 
@system -0001
  2000-20004fff (prio 0, RW): cortexm-mem-sram
  2200-23ff (prio 0, RW): bitband
  40021000-40021027 (prio 0, RW): stm32-rcc
  40022000-40022023 (prio 0, RW): stm32-flash
  e000-efff (prio 0, RW): armv7m-itm
  e000e000-e000efff (prio 0, RW): nvic
e000e000-e000efff (prio 0, RW): nvic_sysregs
e000e100-e000ecff (prio 1, RW): alias nvic-gic @gic_dist 
0100-0cff
  f000- (prio 0, RW): cortexm-mem-hack

except the NVIC range, which is a bit more complicated than needed, the rest 
seem fine to me.


I/O
- (prio 0, RW): io

apparently this does not seem to harm anything, but it looks like an old Intel 
thing (which, in my opinion, was harmful enough at its time...)


regards,

Liviu




Re: [Qemu-devel] [PATCH v5 0/4] monitor: suggest running "help" for command errors

2015-06-08 Thread Markus Armbruster
Copying HMP maintainer Luiz.

Series
Reviewed-by: Markus Armbruster 

Bandan, thanks for your patience.

Luiz, my monitor/QMP queue is currently empty, but if it fills up before
you get around to doing a monitor/HMP pull request, I'm happy to take
this series along, if it gets your Acked-by.



Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 09:09:15AM +0100, Malcolm Crossley wrote:
> On 08/06/15 08:42, Jan Beulich wrote:
>  On 07.06.15 at 08:23,  wrote:
> >> On Mon, Apr 20, 2015 at 04:32:12PM +0200, Michael S. Tsirkin wrote:
> >>> On Mon, Apr 20, 2015 at 03:08:09PM +0100, Jan Beulich wrote:
> >>> On 20.04.15 at 15:43,  wrote:
> > On Mon, Apr 13, 2015 at 01:51:06PM +0100, Jan Beulich wrote:
> > On 13.04.15 at 14:47,  wrote:
> >>> Can you check device capabilities register, offset 0x4 within
> >>> pci express capability structure?
> >>> Bit 15 is 15 Role-Based Error Reporting.
> >>> Is it set?
> >>>
> >>> The spec says:
> >>>
> >>>   15
> >>>   On platforms where robust error handling and PC-compatible 
> >>> Configuration 
> >>> Space probing is
> >>>   required, it is suggested that software or firmware have the 
> >>> Unsupported 
> >>> Request Reporting Enable
> >>>   bit Set for Role-Based Error Reporting Functions, but clear for 
> >>> 1.0a 
> >>> Functions. Software or
> >>>   firmware can distinguish the two classes of Functions by 
> >>> examining the 
> >>> Role-Based Error Reporting
> >>>   bit in the Device Capabilities register.
> >>
> >> Yes, that bit is set.
> >
> > curiouser and curiouser.
> >
> > So with functions that do support Role-Based Error Reporting, we have
> > this:
> >
> >
> > With device Functions implementing Role-Based Error Reporting, 
> > setting the 
> > Unsupported Request
> > Reporting Enable bit will not interfere with PC-compatible 
> > Configuration 
> > Space probing, assuming
> > that the severity for UR is left at its default of non-fatal. 
> > However, 
> > setting the Unsupported Request
> > Reporting Enable bit will enable the Function to report UR 
> > errors 97 
> > detected with posted Requests,
> > helping avoid this case for potential silent data corruption.
> 
>  I still don't see what the PC-compatible config space probing has to
>  do with our issue.
> >>>
> >>> I'm not sure but I think it's listed here because it causes a ton of URs
> >>> when device scan probes unimplemented functions.
> >>>
> > did firmware reconfigure this device to report URs as fatal errors then?
> 
>  No, the Unsupported Request Error Serverity flag is zero.
> >>>
> >>> OK, that's the correct configuration, so how come the box crashes when
> >>> there's a UR then?
> >>
> >> Ping - any update on this?
> > 
> > Not really. All we concluded so far is that _maybe_ the bridge, upon
> > seeing the UR, generates a Master Abort, rendering the whole thing
> > fatal. Otoh the respective root port also has
> > - Received Master Abort set in its Secondary Status register (but
> >   that's also already the case in the log that we have before the UR
> >   occurs, i.e. that doesn't mean all that much),
> > - Received System Error set in its Secondary Status register (and
> >   after the UR the sibling endpoint [UR originating from 83:00.0,
> >   sibling being 83:00.1] also shows Signaled System Error set).
> > 
> 
> Disabling the Memory decode in the command register could also result in a 
> completion timeout on the
> root port issuing a transaction towards the PCI device in question.

Can it really? Such device would violate the PCIE spec, which says:

If the request is not claimed, then it is handled as an
Unsupported Request, which is the
PCI Express equivalent of conventional PCI’s Master Abort termination.




> PCIE completion timeouts can be
> escalated to Fatal AER errors which trigger system firmware to inject NMI's 
> into the host.
> 
> Unsupported requests can also be escalated to be Fatal AER errors (which 
> would again trigger system
> firmware to inject an NMI).

Only if the system is misconfigured. We found out the system in question
is not configured to do this.


> Here is an example AER setup for a PCIE root port. You can see UnsupReq 
> errors are masked and so do
> not trigger errors. CmpltTO ( completion timeout) errors are not masked and 
> the errors are treated
> as Fatal because the corresponding bit in the Uncorrectable Severity register 
> is set.
> 
> Capabilities: [148 v1] Advanced Error Reporting
> UESta:DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- 
> MalfTLP- ECRC- UnsupReq- ACSViol-
> UEMsk:DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt+ UnxCmplt+ RxOF- 
> MalfTLP- ECRC- UnsupReq+ ACSViol+
> UESvrt:   DLP+ SDES+ TLP+ FCP+ CmpltTO+ CmpltAbrt- UnxCmplt- RxOF+ 
> MalfTLP+ ECRC- UnsupReq- ACSViol-
> CESta:RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:RxErr+ BadTLP+ BadDLLP+ Rollover+ Timeout+ NonFatalErr+
> AERCap:   First Error Pointer: 00, GenCap- CGenEn- ChkCap- ChkEn-
> 
> A root port completion timeout will also re

Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Jan Beulich
>>> On 08.06.15 at 10:09,  wrote:
> On 08/06/15 08:42, Jan Beulich wrote:
>> Not really. All we concluded so far is that _maybe_ the bridge, upon
>> seeing the UR, generates a Master Abort, rendering the whole thing
>> fatal. Otoh the respective root port also has
>> - Received Master Abort set in its Secondary Status register (but
>>   that's also already the case in the log that we have before the UR
>>   occurs, i.e. that doesn't mean all that much),
>> - Received System Error set in its Secondary Status register (and
>>   after the UR the sibling endpoint [UR originating from 83:00.0,
>>   sibling being 83:00.1] also shows Signaled System Error set).
>> 
> 
> Disabling the Memory decode in the command register could also result in a 
> completion timeout on the
> root port issuing a transaction towards the PCI device in question. PCIE 
> completion timeouts can be
> escalated to Fatal AER errors which trigger system firmware to inject NMI's 
> into the host.

And how does all that play with PC compatibility (where writes into
no-where get dropped, and reads from no-where get all ones
returned)? Remember - we#re talking about CPU side accesses
here.

> Here is an example AER setup for a PCIE root port. You can see UnsupReq 
> errors are masked and so do
> not trigger errors. CmpltTO ( completion timeout) errors are not masked and 
> the errors are treated
> as Fatal because the corresponding bit in the Uncorrectable Severity 
> register is set.
> 
> Capabilities: [148 v1] Advanced Error Reporting
> UESta:DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- 
> MalfTLP- ECRC- UnsupReq- 
> ACSViol-
> UEMsk:DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt+ UnxCmplt+ RxOF- 
> MalfTLP- ECRC- 
> UnsupReq+ ACSViol+
> UESvrt:   DLP+ SDES+ TLP+ FCP+ CmpltTO+ CmpltAbrt- UnxCmplt- RxOF+ 
> MalfTLP+ ECRC- 
> UnsupReq- ACSViol-
> CESta:RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:RxErr+ BadTLP+ BadDLLP+ Rollover+ Timeout+ NonFatalErr+
> AERCap:   First Error Pointer: 00, GenCap- CGenEn- ChkCap- ChkEn-
> 
> A root port completion timeout will also result in the master abort bit 
> being set.
> 
> Typically system firmware clears the error in the AER registers after it's 
> processed it. So the
> operating system may not be able to determine what error triggered the NMI 
> in the first place.

Right, but in the case at hand we have an ITP log available, which
increases the hope that we see a reasonably complete picture.

>>> Do we can chalk this up to hardware bugs on a specific box?
>> 
>> I have to admit that I'm still very uncertain whether to consider all
>> this correct behavior, a firmware flaw, or a hardware bug.
> I believe the correct behaviour is happening but a PCIE completion timeout 
> is occurring instead of a
> unsupported request.

Might it be that with the supposedly correct device returning UR
the root port reissues the request to the sibling device, which then
fails it in a more dramatic way (albeit the sibling's Uncorrectable
Error Status Register also has only Unsupported Request Error
Status set)?

Jan




Re: [Qemu-devel] [PATCH 1/1] monitor: Fix QMP ABI breakage around "id"

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 09:44, Markus Armbruster  wrote:
> Commit 65207c5 accidentally dropped a line of code we need along with
> a comment that became wrong then.  This made QMP reject "id":
>
> {"execute": "system_reset", "id": "1"}
> {"error": {"class": "GenericError", "desc": "QMP input object member 'id' 
> is unexpected"}}
>
> Put the lost line right back, so QMP again accepts and returns "id",
> as promised by the ABI:
>
> {"execute": "system_reset", "id": "1"}
> {"return": {}, "id": "1"}
>
> Reported-by: Fabio Fantoni 
> Signed-off-by: Markus Armbruster 
> Signed-off-by: Don Slutz 
> Tested-by: Fabio Fantoni 
> Signed-off-by: Wen Congyang 
> Signed-off-by: Pavel Fedin 
> Tested-by: Eric Blake 
> Signed-off-by: Markus Armbruster 

That's a lot of signed-off-by lines for a two line patch :-)

> ---
>  monitor.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/monitor.c b/monitor.c
> index c7baa91..9afee7b 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -4955,6 +4955,8 @@ static QDict *qmp_check_input_obj(QObject *input_obj, 
> Error **errp)
>"arguments", "object");
>  return NULL;
>  }
> +} else if (!strcmp(arg_name, "id")) {
> +/* Any string is acceptable as "id", so nothing to check */
>  } else {
>  error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
>  return NULL;
> --
> 1.9.3

I'll apply this directly to master later today.

-- PMM



Re: [Qemu-devel] ELF loader?

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 09:27, Liviu Ionescu  wrote:
>
>> On 08 Jun 2015, at 10:50, Peter Maydell  wrote:
>>
>> On 7 June 2015 at 17:03, Liviu Ionescu  wrote:
>>>
 On 07 Jun 2015, at 13:46, Peter Maydell  wrote:
 ... Some ELF files intended for embedded use have
 a little self-initializing bit on the front that manually clears
 their own .bss section, but that's not part of the ELF spec, and
 QEMU will handle the other kind too.
>>>
>>> as I said, in the embedded world things are a bit different, there
>>> is no ELF loader, or you can consider the debugger as an ELF loader
>>> since it is reading the ELF file and programming the flash.
>>>
>>> but during this step it makes no sense to write/clear the RAM
>>
>> That depends on what the ELF file is.
>
> yes and no.
>
> in this peculiar case, the problem is the simplistic approach
> in the QEMU ELF loader, that uses only the header defs, ignoring
>the detailed definitions in the separate sections.

No. As I explained, this is correct. An ELF loader is *supposed*
to load based on the segment definitions. Sections are only
for linking. If you don't like the segment descriptions in your
ELF file you should probably change your linker script.

-- PMM



Re: [Qemu-devel] [PATCH] OvmfPkg/PlatformPei: Initialise RCBA (B0:D31:F0 0xf0) register

2015-06-08 Thread Laszlo Ersek
On 06/06/15 21:10, Paulo Alcantara wrote:
> This patch initialises root complex register block BAR in order to
> support TCO watchdog emulation features (e.g. reboot upon NO_REBOOT bit
> not set) on QEMU.
> 
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Paulo Alcantara 
> ---
>  OvmfPkg/Include/IndustryStandard/Q35MchIch9.h | 6 ++
>  OvmfPkg/OvmfPkg.dec   | 4 
>  OvmfPkg/PlatformPei/Platform.c| 7 +++
>  OvmfPkg/PlatformPei/PlatformPei.inf   | 1 +
>  4 files changed, 18 insertions(+)
> 
> diff --git a/OvmfPkg/Include/IndustryStandard/Q35MchIch9.h 
> b/OvmfPkg/Include/IndustryStandard/Q35MchIch9.h
> index 4f59a7c..4d42dfa 100644
> --- a/OvmfPkg/Include/IndustryStandard/Q35MchIch9.h
> +++ b/OvmfPkg/Include/IndustryStandard/Q35MchIch9.h
> @@ -90,4 +90,10 @@
>  #define ICH9_SMI_EN_APMC_EN  BIT5
>  #define ICH9_SMI_EN_GBL_SMI_EN   BIT0
>  
> +//
> +// Root Complex Base Address register
> +//
> +#define ICH9_RCBA 0xf0
> +#define ICH9_RCBA_EN  BIT0
> +

The RCBA belongs to the same register block as PMBASE (I just checked in
the ich9 spec). Therefore, please move up this source code hunk, to the
end of the source code section that starts with:

//
// B/D/F/Type: 0/0x1f/0/PCI
//

As-is, the patch would add it to "IO ports relative to PMBASE", and
that's not a good place for it.

In addition, the 0xf0 replacement text should line up with 0x40, 0x44,
0xA0 visible in that part of the code. Then, the BIT0 replacement text
in the other macro should be indented two more positions relative to
that. (So that it lines up with BIT7, BIT4 etc. visible in that part of
the code.)

>  #endif
> diff --git a/OvmfPkg/OvmfPkg.dec b/OvmfPkg/OvmfPkg.dec
> index 4cb70dc..a6586f3 100644
> --- a/OvmfPkg/OvmfPkg.dec
> +++ b/OvmfPkg/OvmfPkg.dec
> @@ -78,6 +78,10 @@
>#  to PIIX4 function 3 offset 0x40-0x43 bits [15:6].
>gUefiOvmfPkgTokenSpaceGuid.PcdAcpiPmBaseAddress|0xB000|UINT16|5
>  
> +  ## This flag determines the Root Complex Register Block BAR, written to Q35
> +  #  function 31 offset 0xf0-0xf3 bits [31:14]
> +  gUefiOvmfPkgTokenSpaceGuid.PcdRootComplexBaseAddress|0xfed1c000|UINT32|0x1e
> +

I understand Jordan doesn't like the new PCD here, and proposes a fixed
macro for the same purpose, but I don't understand why we should follow
a different avenue for this base address when we opted for a PCD with
the PMBA.

>## When VirtioScsiDxe is instantiated for a HBA, the numbers of targets and
>#  LUNs are retrieved from the host during virtio-scsi setup.
>#  MdeModulePkg/Bus/Scsi/ScsiBusDxe then scans all MaxTarget * MaxLun
> diff --git a/OvmfPkg/PlatformPei/Platform.c b/OvmfPkg/PlatformPei/Platform.c
> index 1126c65..09c9a2c 100644
> --- a/OvmfPkg/PlatformPei/Platform.c
> +++ b/OvmfPkg/PlatformPei/Platform.c
> @@ -261,6 +261,13 @@ MiscInitialization (
>Pmba   = POWER_MGMT_REGISTER_Q35 (ICH9_PMBASE);
>AcpiCtlReg = POWER_MGMT_REGISTER_Q35 (ICH9_ACPI_CNTL);
>AcpiEnBit  = ICH9_ACPI_CNTL_ACPI_EN;
> +
> +  //
> +  // Set Root Complex Register Block BAR
> +  //
> +  PciWrite32 (POWER_MGMT_REGISTER_Q35 (ICH9_RCBA),
> +  PcdGet32 (PcdRootComplexBaseAddress) | (UINT32)ICH9_RCBA_EN
> + );
>break;
>  default:
>DEBUG ((EFI_D_ERROR, "%a: Unknown Host Bridge Device ID: 0x%04x\n",

This is not the right place for this code, I think.

If there's going to be a separate DXE driver for the feature, then the
enablement belongs there.

If not (and I guess there won't be?), then chipset register setting
indeed belongs here -- but not this particular block of code. The "case"
statement in question bears the comment "Query Host Bridge DID to
determine platform type and save to PCD". Let's not mix responsibilities.

If you'd like to add the RCBA setting to this function (and I do agree
MiscInitialization is a good place for it), then please add an explicit
"if" and the dependent code to the end of the function.

> diff --git a/OvmfPkg/PlatformPei/PlatformPei.inf 
> b/OvmfPkg/PlatformPei/PlatformPei.inf
> index 0307bca..4aa47cc 100644
> --- a/OvmfPkg/PlatformPei/PlatformPei.inf
> +++ b/OvmfPkg/PlatformPei/PlatformPei.inf
> @@ -74,6 +74,7 @@
>gUefiOvmfPkgTokenSpaceGuid.PcdOvmfLockBoxStorageSize
>gUefiOvmfPkgTokenSpaceGuid.PcdGuidedExtractHandlerTableSize
>gUefiOvmfPkgTokenSpaceGuid.PcdOvmfHostBridgePciDevId
> +  gUefiOvmfPkgTokenSpaceGuid.PcdRootComplexBaseAddress
>gEfiIntelFrameworkModulePkgTokenSpaceGuid.PcdS3AcpiReservedMemorySize
>gEfiMdePkgTokenSpaceGuid.PcdGuidedExtractHandlerTableAddress
>gEfiMdeModulePkgTokenSpaceGuid.PcdVariableStoreSize
> 

Finally, from your other email it looks like the RCBA will be set to the
fixed address 0xFED1C000. In order to keep the code & comments
up-to-date, please modify MemMapInitialization() as well: the MMIO
address 0xFED1C000 splits the "gap" that starts at 0xFED00400. Please
update the co

Re: [Qemu-devel] [PATCH 1/2] block-backend: Introduce blk_drain() and replace blk_drain_all()

2015-06-08 Thread Stefan Hajnoczi
On Wed, Jun 03, 2015 at 04:46:14PM +0300, Alexander Yarygin wrote:
> Each call of the virtio_blk_reset() function calls blk_drain_all(),
> which works for all existing BlockDriverStates, while only one
> BlockDriverState needs to be drained.
> 
> This patch introduces the blk_drain() function and replaces
> blk_drain_all() on it in virtio_blk_reset().
> 
> Cc: Christian Borntraeger 
> Cc: Cornelia Huck 
> Cc: Kevin Wolf 
> Cc: Paolo Bonzini 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Alexander Yarygin 
> ---
>  block/block-backend.c  | 5 +
>  hw/block/virtio-blk.c  | 2 +-
>  include/sysemu/block-backend.h | 1 +
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 93e46f3..aee8a12 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -700,6 +700,11 @@ int blk_flush_all(void)
>  return bdrv_flush_all();
>  }
>  
> +void blk_drain(BlockBackend *blk)
> +{
> +bdrv_drain(blk->bs);
> +}
> +
>  void blk_drain_all(void)
>  {
>  bdrv_drain_all();
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index e6afe97..abaca58 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -660,7 +660,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
>   * This should cancel pending requests, but can't do nicely until there
>   * are per-device request lists.
>   */
> -blk_drain_all();
> +blk_drain(s->blk);
>  blk_set_enable_write_cache(s->blk, s->original_wce);
>  }

I noticed a bug in the current code when reviewing this:

If the 'backup' block job is active when the virtio-blk device is reset,
bs is moved to the global AioContext while the target stays in the
iothread AioContext.  Accesses will be made to target without acquiring
its AioContext.

This is not your fault but calling blk_drain() increases the chance that
this will deadlock.  The bs request cannot make progress until target
completes the I/O request.  Since target doesn't pump events when we
call aio_poll() on bs, draining bs will result in a hang if there are
write requests pending on bs.

I'll write a patch to address this case.

Stefan


pgpZYjtq7hwtF.pgp
Description: PGP signature


Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Jason Wang


On 06/08/2015 04:21 PM, Thibaut Collet wrote:
> Hi,
>
> My understanding of gratuitous packet with virtio for any backend
> (vhost user or other):
> - When the VM is loaded (first start or migration) the virtio net
> interfaces are loaded ( virtio_net_load_device function in
> hw/net/virtio-net.c)
> - If the guest has the VIRTIO_NET_F_GUEST_ANNOUNCE capability, request
> to send gratuitous packet is done.
>
> 1. To enable gratuitous packet through this mechanism I have
> added VIRTIO_NET_F_GUEST_ANNOUNCE capability to hw/net/vhost_net.c. So
> host and guest can negotiate this feature when vhost-user is used.
>
> 2. self announce occurs in case of live migration. During a live
> migration a GARP is sent to all net backend through a queue dedicated
> to the net backend.
>But for vhost-user:
>- this operation is not possible (vhost-user has no queue)
>- it is already done with the previous mechanism.
>Rather to define a queue to vhost user and notify twice the guest
> to send gratuitous packet I have disable GARP from self announce and
> use only the first mechanism for that.
>
> I have tested my modifications with guest that supports
> VIRTIO_NET_F_GUEST_ANNOUNCE and vhost-user on the host. After a live
> migration I have the GARP from the guest.

Yes, your patch works well for recent drivers. But the problem is legacy
guest/driver without VIRTIO_NET_F_GUEST_ANNOUNCE. In this case there
will be no GARP sent after migration.

Thanks



Re: [Qemu-devel] [PATCH v2 04/10] crypto: introduce generic cipher API & built-in implementation

2015-06-08 Thread Gonglei
On 2015/6/2 21:45, Daniel P. Berrange wrote:
> +
> +static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher,
> +  const void *in,
> +  void *out,
> +  size_t len,
> +  Error **errp)
> +{
> +QCryptoCipherBuiltin *ctxt = cipher->opaque;
> +size_t i;
> +
> +deskey(ctxt->state.desrfb.key, EN0);
> +
Why not move the below check to top of deskey() ? and...

> +if (len % 8) {
> +error_setg(errp, _("Buffer size must be multiple of 8 not %zu"),
> +   len);
> +return -1;
> +}
> +
> +for (i = 0; i < len; i += 8) {
> +des((void *)in + i, out + i);
> +}
> +
> +return 0;
> +}
> +
> +
> +static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher,
> +  const void *in,
> +  void *out,
> +  size_t len,
> +  Error **errp)
> +{
> +QCryptoCipherBuiltin *ctxt = cipher->opaque;
> +size_t i;
> +
> +deskey(ctxt->state.desrfb.key, DE1);
> +

...the same.

> +if (len % 8) {
> +error_setg(errp, _("Buffer size must be multiple of 8 not %zu"),
> +   len);
> +return -1;
> +}
> +
> +for (i = 0; i < len; i += 8) {
> +des((void *)in + i, out + i);
> +}
> +
> +return 0;
> +}

Regards,
-Gonglei




Re: [Qemu-devel] Trying to execute code outside RAM or ROM at 0x08000230

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 09:46, Liviu Ionescu  wrote:
>
>> On 07 Jun 2015, at 01:11, Peter Maydell  wrote:
>>
>> One handy debugging tool for tracking down this kind of thing is to
>> use the QEMU monitor's "info mtree" command,
>
> I added "-monitor stdio" to the Eclipse configuration used to start the 
> emulator, and I got access to the monitor, but apparently it uses some fancy 
> terminal commands, that are not properly handled by the Eclipse console:
>
> (qemu) info mtree
> i [K [Din [K [D [Dinf [K [D [D [Dinfo [K [D [D [D [Dinfo  [K [D [D [D [D 
> [Dinfo m [K [D [D [D [D [D [Dinfo mt [K [D [D [D [D [D [D [Dinfo mtr [K [D [D 
> [D [D [D [D [D [Dinfo mtre [K [D [D [D [D [D [D [D [D [Dinfo mtree [K
> Execute 'mon info mtree'.
>
> Q: is there any simple way to get rid of them?

This is probably the readline support (so you can do cursor
editing of command lines). You can turn that off, though I forget
the syntax -- it should be documented somewhere. (The cursor
sequences are just the usual VT100/etc ones for cursor movement
and character deletion, so it's a bit odd that you see them:
I would have expected that either the Eclipse console did all
its own editing and cursor movement and just sent the finished
line to QEMU, or that if it's sending the cursor escapes when
you do cursor movement that it doesn't get echoed back.)

What is printing the "Execute ..." line? A quick grep of the
sources suggests it's not QEMU.

> as for memory map, I get:
>
> memory
> - (prio 0, RW): system
>   -0001 (prio 0, R-): cortexm-mem-flash
>   0800-0801 (prio 0, R-): alias stm32-mem-flash-alias 
> @system -0001

This is still aliasing the whole system address space, rather
than just the flash device. The effects will be the same but
it's a conceptual error I think.

>   2000-20004fff (prio 0, RW): cortexm-mem-sram
>   2200-23ff (prio 0, RW): bitband
>   40021000-40021027 (prio 0, RW): stm32-rcc
>   40022000-40022023 (prio 0, RW): stm32-flash
>   e000-efff (prio 0, RW): armv7m-itm
>   e000e000-e000efff (prio 0, RW): nvic
> e000e000-e000efff (prio 0, RW): nvic_sysregs
> e000e100-e000ecff (prio 1, RW): alias nvic-gic @gic_dist 
> 0100-0cff
>   f000- (prio 0, RW): cortexm-mem-hack
>
> except the NVIC range, which is a bit more complicated than needed, the rest 
> seem fine to me.

What's the cortexm-mem-hack ?

> I/O
> - (prio 0, RW): io
>
> apparently this does not seem to harm anything, but it looks like an
> old Intel thing (which, in my opinion, was harmful enough at its time...)

The system I/O space always exists, but for ARM boards we don't
put anything in it, and we don't make it accessible to the guest.

-- PMM



Re: [Qemu-devel] [PATCH v2 05/10] crypto: add a gcrypt cipher implementation

2015-06-08 Thread Gonglei
On 2015/6/2 21:45, Daniel P. Berrange wrote:
> +
> +void qcrypto_cipher_free(QCryptoCipher *cipher)
> +{
> +gcry_cipher_hd_t handle = cipher->opaque;
> +if (!cipher) {
> +return;
> +}

The check is too late. :(

> +gcry_cipher_close(handle);
> +g_free(cipher);
> +}

Regards,
-Gonglei




Re: [Qemu-devel] Redundant redeclaration of 'gmtime_r' with mingw64

2015-06-08 Thread Stefan Hajnoczi
On Fri, Jun 05, 2015 at 07:18:53PM +0200, Stefan Weil wrote:
> Am 05.06.2015 um 16:38 schrieb Stefan Hajnoczi:
> >Hi Stefan,
> >I get the following compiler warning in Fedora 22
> >(mingw32-headers-4.0.2-1.fc22):
> >
> >In file included from qemu/include/qemu-common.h:47:0,
> >  from qemu/include/qemu/timer.h:5,
> >  from qemu/include/sysemu/sysemu.h:8,
> >  from os-win32.c:34:
> >qemu/include/sysemu/os-win32.h:77:12: warning: redundant redeclaration
> >of 'gmtime_r' [-Wredundant-decls]
> >  struct tm *gmtime_r(const time_t *timep, struct tm *result);
> > ^
> >In file included from os-win32.c:30:0:
> >/usr/i686-w64-mingw32/sys-root/mingw/include/time.h:272:107: note:
> >previous definition of 'gmtime_r' was here
> >
> >QEMU has its own (non-reentrant) gmtime_r() and localtime_r()
> >functions on Windows.  os-win32.h redefines the functions so the
> >compiler is right to complain.
> >
> >I thought about adding qemu_gmtime_r() and qemu_localtime_r()
> >functions to avoid the name clash.
> >
> >Do you have any new thoughts on this commit which introduced the
> >os-win32.h definitions?
> 
> The version provided by Debian Jessie (mingw-w64 3.2.0)
> still uses macros to implement those functions - that's why
> I don't see that compiler warnings.
> 
> I'd prefer a solution which conditionally includes the QEMU
> declaration (include/sysemu/os-win32.h) and the implementation
> (util/oslib-win32.c), either depending on the mingw-w64 version
> or on the result of a configuration check done while running
> configure.

Does that mean you want:

1. gmtime() is a macro - use QEMU implementation
2. gmtime() is a function - use mingw function
3. gmtime() is undefined - use QEMU implementation

?


pgpf1OsQuoei2.pgp
Description: PGP signature


Re: [Qemu-devel] QEMU's CVE Procedures

2015-06-08 Thread Stefan Hajnoczi
On Fri, Jun 05, 2015 at 06:16:30PM -0400, John Snow wrote:
> Anyway, my apologies for the wall of text. I wanted to take this
> opportunity post-venom to ask some questions to the list to see if the
> interest is there in revamping our CVE policy which is in need of, at
> the very least, some clarifications.

Can you post a draft SecurityProcess wiki page that includes your
suggested changes?

Stefan


pgpaBM8laXy14.pgp
Description: PGP signature


[Qemu-devel] [Bug 1462944] [NEW] vpc file causes qemu-img to consume lots of time and memory

2015-06-08 Thread Richard Jones
Public bug reported:

The attached vpc file causes 'qemu-img info' to consume 3 or 4 seconds
of CPU time and 1.3 GB of heap, causing a minor denial of service.

$ /usr/bin/time ~/d/qemu/qemu-img info afl12.img
block-vpc: The header checksum of 'afl12.img' is incorrect.
qemu-img: Could not open 'afl12.img': block-vpc: free_data_block_offset points 
after the end of file. The image has been truncated.
1.19user 3.15system 0:04.35elapsed 99%CPU (0avgtext+0avgdata 
1324504maxresident)k
0inputs+0outputs (0major+327314minor)pagefaults 0swaps

The file was found using american-fuzzy-lop.

** Affects: qemu
 Importance: Undecided
 Status: New

** Attachment added: "afl12.img"
   https://bugs.launchpad.net/bugs/1462944/+attachment/4411469/+files/afl12.img

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1462944

Title:
  vpc file causes qemu-img to consume lots of time and memory

Status in QEMU:
  New

Bug description:
  The attached vpc file causes 'qemu-img info' to consume 3 or 4 seconds
  of CPU time and 1.3 GB of heap, causing a minor denial of service.

  $ /usr/bin/time ~/d/qemu/qemu-img info afl12.img
  block-vpc: The header checksum of 'afl12.img' is incorrect.
  qemu-img: Could not open 'afl12.img': block-vpc: free_data_block_offset 
points after the end of file. The image has been truncated.
  1.19user 3.15system 0:04.35elapsed 99%CPU (0avgtext+0avgdata 
1324504maxresident)k
  0inputs+0outputs (0major+327314minor)pagefaults 0swaps

  The file was found using american-fuzzy-lop.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1462944/+subscriptions



Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 08:42:57AM +0100, Jan Beulich wrote:
> >>> On 07.06.15 at 08:23,  wrote:
> > On Mon, Apr 20, 2015 at 04:32:12PM +0200, Michael S. Tsirkin wrote:
> >> On Mon, Apr 20, 2015 at 03:08:09PM +0100, Jan Beulich wrote:
> >> > >>> On 20.04.15 at 15:43,  wrote:
> >> > > On Mon, Apr 13, 2015 at 01:51:06PM +0100, Jan Beulich wrote:
> >> > >> >>> On 13.04.15 at 14:47,  wrote:
> >> > >> > Can you check device capabilities register, offset 0x4 within
> >> > >> > pci express capability structure?
> >> > >> > Bit 15 is 15 Role-Based Error Reporting.
> >> > >> > Is it set?
> >> > >> > 
> >> > >> > The spec says:
> >> > >> > 
> >> > >> > 15
> >> > >> > On platforms where robust error handling and PC-compatible 
> >> > >> > Configuration 
> >> > >> > Space probing is
> >> > >> > required, it is suggested that software or firmware have the 
> >> > >> > Unsupported 
> >> > >> > Request Reporting Enable
> >> > >> > bit Set for Role-Based Error Reporting Functions, but clear for 
> >> > >> > 1.0a 
> >> > >> > Functions. Software or
> >> > >> > firmware can distinguish the two classes of Functions by 
> >> > >> > examining the 
> >> > >> > Role-Based Error Reporting
> >> > >> > bit in the Device Capabilities register.
> >> > >> 
> >> > >> Yes, that bit is set.
> >> > > 
> >> > > curiouser and curiouser.
> >> > > 
> >> > > So with functions that do support Role-Based Error Reporting, we have
> >> > > this:
> >> > > 
> >> > > 
> >> > >With device Functions implementing Role-Based Error Reporting, 
> >> > > setting the 
> >> > > Unsupported Request
> >> > >Reporting Enable bit will not interfere with PC-compatible 
> >> > > Configuration 
> >> > > Space probing, assuming
> >> > >that the severity for UR is left at its default of non-fatal. 
> >> > > However, 
> >> > > setting the Unsupported Request
> >> > >Reporting Enable bit will enable the Function to report UR 
> >> > > errors 97 
> >> > > detected with posted Requests,
> >> > >helping avoid this case for potential silent data corruption.
> >> > 
> >> > I still don't see what the PC-compatible config space probing has to
> >> > do with our issue.
> >> 
> >> I'm not sure but I think it's listed here because it causes a ton of URs
> >> when device scan probes unimplemented functions.
> >> 
> >> > > did firmware reconfigure this device to report URs as fatal errors 
> >> > > then?
> >> > 
> >> > No, the Unsupported Request Error Serverity flag is zero.
> >> 
> >> OK, that's the correct configuration, so how come the box crashes when
> >> there's a UR then?
> > 
> > Ping - any update on this?
> 
> Not really. All we concluded so far is that _maybe_ the bridge, upon
> seeing the UR, generates a Master Abort, rendering the whole thing
> fatal.

But Master Abort is the equivalent of the UR, so I think that a
reasonable system would not be configured to trigger a fatal error
in this case - and you previously said it's configured reasonably.

> Otoh the respective root port also has
> - Received Master Abort set in its Secondary Status register (but
>   that's also already the case in the log that we have before the UR
>   occurs, i.e. that doesn't mean all that much),
> - Received System Error set in its Secondary Status register (and
>   after the UR the sibling endpoint [UR originating from 83:00.0,
>   sibling being 83:00.1] also shows Signaled System Error set).

It's another function of the same physical device, correct?

So is this sibling the only function sending SERR?
What happens if you disable SERR# in the command register
of 83:00.1?



> > Do we can chalk this up to hardware bugs on a specific box?
> 
> I have to admit that I'm still very uncertain whether to consider all
> this correct behavior, a firmware flaw, or a hardware bug.
> 
> Jan

Questions:
1.  Does this only happen with a specific endpoint?
What if you add another endpoint to the same system?
2.  Has a driver initialized this endpoint? What if you don't
load a driver before sending the transaction resulting in the UR?


-- 
MST



Re: [Qemu-devel] segfault in memcmp

2015-06-08 Thread Stefan Hajnoczi
On Fri, Jun 05, 2015 at 05:19:53PM -0500, perrier vincent wrote:
> Using a very old guest (lenny) with spice and vga=cirrus, I have
> a segfault:
> 
> FILE:  ui/spice-display.c
> FUNCTION:  qemu_spice_create_update
> LINE:  if (memcmp(guest + yoff + xoff,
>mirror + yoff + xoff,
>bw * bpp) == 0)
> 
> The address of mirror + yoff + xoff is out of boundaries.
> 
> I use the following to avoid the crash:
> 
> ...
>   img_get_stride = pixman_image_get_stride(ssd->mirror);
>   img_height = pixman_image_get_height(ssd->mirror);
>   img_max = img_height * img_get_stride;
> ...
>   if (yoff > img_max)
> {
> if (dirty_top[blk] == -1)
>   dirty_top[blk] = y;
> }
>   else if (memcmp(guest + yoff + xoff,
>   mirror + yoff + xoff,
>   bw * bpp) == 0)
> {
> ...

Thanks for the report.  I have CCed Gerd Hoffmann who maintains the
graphics subsystem.


pgpEsnb3uaOkX.pgp
Description: PGP signature


Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 10:03:18AM +0100, Jan Beulich wrote:
> >>> On 08.06.15 at 10:09,  wrote:
> > On 08/06/15 08:42, Jan Beulich wrote:
> >> Not really. All we concluded so far is that _maybe_ the bridge, upon
> >> seeing the UR, generates a Master Abort, rendering the whole thing
> >> fatal. Otoh the respective root port also has
> >> - Received Master Abort set in its Secondary Status register (but
> >>   that's also already the case in the log that we have before the UR
> >>   occurs, i.e. that doesn't mean all that much),
> >> - Received System Error set in its Secondary Status register (and
> >>   after the UR the sibling endpoint [UR originating from 83:00.0,
> >>   sibling being 83:00.1] also shows Signaled System Error set).
> >> 
> > 
> > Disabling the Memory decode in the command register could also result in a 
> > completion timeout on the
> > root port issuing a transaction towards the PCI device in question. PCIE 
> > completion timeouts can be
> > escalated to Fatal AER errors which trigger system firmware to inject NMI's 
> > into the host.
> 
> And how does all that play with PC compatibility (where writes into
> no-where get dropped, and reads from no-where get all ones
> returned)? Remember - we#re talking about CPU side accesses
> here.
> 
> > Here is an example AER setup for a PCIE root port. You can see UnsupReq 
> > errors are masked and so do
> > not trigger errors. CmpltTO ( completion timeout) errors are not masked and 
> > the errors are treated
> > as Fatal because the corresponding bit in the Uncorrectable Severity 
> > register is set.
> > 
> > Capabilities: [148 v1] Advanced Error Reporting
> > UESta:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- 
> > MalfTLP- ECRC- UnsupReq- 
> > ACSViol-
> > UEMsk:  DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt+ UnxCmplt+ RxOF- 
> > MalfTLP- ECRC- 
> > UnsupReq+ ACSViol+
> > UESvrt: DLP+ SDES+ TLP+ FCP+ CmpltTO+ CmpltAbrt- UnxCmplt- RxOF+ 
> > MalfTLP+ ECRC- 
> > UnsupReq- ACSViol-
> > CESta:  RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> > CEMsk:  RxErr+ BadTLP+ BadDLLP+ Rollover+ Timeout+ NonFatalErr+
> > AERCap: First Error Pointer: 00, GenCap- CGenEn- ChkCap- ChkEn-
> > 
> > A root port completion timeout will also result in the master abort bit 
> > being set.
> > 
> > Typically system firmware clears the error in the AER registers after it's 
> > processed it. So the
> > operating system may not be able to determine what error triggered the NMI 
> > in the first place.
> 
> Right, but in the case at hand we have an ITP log available, which
> increases the hope that we see a reasonably complete picture.
> 
> >>> Do we can chalk this up to hardware bugs on a specific box?
> >> 
> >> I have to admit that I'm still very uncertain whether to consider all
> >> this correct behavior, a firmware flaw, or a hardware bug.
> > I believe the correct behaviour is happening but a PCIE completion timeout 
> > is occurring instead of a
> > unsupported request.
> 
> Might it be that with the supposedly correct device returning UR
> the root port reissues the request to the sibling device, which then
> fails it in a more dramatic way (albeit the sibling's Uncorrectable
> Error Status Register also has only Unsupported Request Error
> Status set)?
> 
> Jan

Isn't the sibling a function on the same device?
And is the request causing the UR a memory read?
If so doesn't this use address routing?
What does it mean that the request is "to the sibling device" then?
Does the sibling device have a BAR overlapping the address?

-- 
MST



Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Pavel Fedin
 Hello!

> >  I thought about it when i developed this, but in my implementation
> > affinity bits are assigned during CPU instantiation, while feature
> > bits can be added later, during realize. I could tweak set_feature()
> > to sync up MPIDR value but perhaps it isn't the best thing to do.
> 
> Can we not just assign the whole thing at realize?

 kvm_arch_init_vcpu() is called before realize, therefore i would have to track 
down whether KVM is active or not.

> You need to fix the 32-bit side anyway.

 Actually already done in my working tree.

> I still think that having the whole mpidr in the struct will
> be less confusing.

 Ok, if you are really insisting on that, i can assign IDs where this is 
currently done and add feature bits in arm_cpu_realizefn().
 Last argument: the rest of qemu code (property assignment, lookup in PSCI, 
etc) actually needs IDs without feature bits. The only function which needs 
full version is mpidr_read(). So does it still worth of putting AND with 
bitmasks everywhere? May be just rename 'mpidr' to something like 'mp_id' ?

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia




Re: [Qemu-devel] [PATCH 4/4] i386/acpi-build: build_crs(): fetch BAR from PCI config space directly

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 09:56:15AM +0200, Laszlo Ersek wrote:
> On 06/07/15 11:23, Michael S. Tsirkin wrote:
> > On Sat, Jun 06, 2015 at 01:46:29AM +0200, Laszlo Ersek wrote:
> >> OVMF downloads the ACPI linker/loader script from QEMU when the edk2 PCI
> >> Bus driver globally signals the firmware that PCI enumeration and resource
> >> allocation have completed. At this point QEMU regenerates the ACPI payload
> >> in an fw_cfg read callback, and this is when the PXB's _CRS gets
> >> populated.
> >>
> >> Unfortunately, when this happens, the PCI_COMMAND_MEMORY bit is clear in
> >> the root bus's command register, *unlike* under SeaBIOS. The consequences
> >> unfold as follows:
> >>
> >> - When build_crs() fetches dev->io_regions[i].addr, it is all-bits-one,
> >>   because pci_update_mappings() --> pci_bar_address() calculated it as
> >>   PCI_BAR_UNMAPPED, due to the PCI_COMMAND_MEMORY bit being clear.
> >>
> >> - Consequently, the SHPC MMIO BAR (bar 0) of the bridge is not added to
> >>   the _CRS, *despite* having been programmed in PCI config space.
> >>
> >> - Similarly, the SHPC MMIO BAR of the PXB is not removed from the main
> >>   root bus's DWordMemory descriptor.
> >>
> >> - Guest OSes (Linux and Windows alike) notice the pre-programmed SHPC BAR
> >>   within the PXB's config space, and notice that it conflicts with the
> >>   main root bus's memory resource descriptors. Linux reports
> >>
> >>   pci :04:00.0: BAR 0: can't assign mem (size 0x100)
> >>   pci :04:00.0: BAR 0: trying firmware assignment [mem
> >>0x8820-0x882000ff 64bit]
> >>   pci :04:00.0: BAR 0: [mem 0x8820-0x882000ff 64bit] conflicts
> >>with PCI Bus :00 [mem
> >>0x8820-0xfebf]
> >>
> >>   While Windows Server 2012 R2 reports
> >>
> >> https://technet.microsoft.com/en-us/library/cc732199%28v=ws.10%29.aspx
> >>
> >> This device cannot find enough free resources that it can use. If you
> >> want to use this device, you will need to disable one of the other
> >> devices on this system. (Code 12)
> >>
> >> (This issue was apparently encountered earlier, see:
> >>
> >>   https://lists.nongnu.org/archive/html/qemu-devel/2015-01/msg02983.html
> >>
> >> and the current hole-punching logic in build_crs() and build_ssdt() is
> >> probably supposed to remedy exactly that problem -- however, for OVMF they
> >> don't work, because at the end of the PCI enumeration and resource
> >> allocation, which cues the ACPI linker/loader client, the command register
> >> is clear.)
> >>
> >> The solution is to fetch the BAR addresses from PCI config space directly,
> >> for the purposes of build_crs(), regardless of the PCI command register
> >> and any MemoryRegion state.
> >>
> >> Example MMIO maps for a 2GB guest:
> >>
> >> SeaBIOS:
> >>
> >>   main: 0x8000..0xFC00 / 0x7C00
> >>   pxb:  0xFC00..0xFC20 / 0x0020
> >>   main: 0xFC20..0xFC213000 / 0x00013000
> >>   pxb:  0xFC213000..0xFC213100 / 0x0100 <- SHPC BAR
> >>   main: 0xFC213100..0xFEA0 / 0x027ECF00
> >>   pxb:  0xFEA0..0xFEC0 / 0x0020
> >>
> >> OVMF, without the fix:
> >>
> >>   main: 0x8000..0x8810 / 0x0810
> >>   pxb:  0x8810..0x8820 / 0x0010
> >>   main: 0x8820..0xFEC0 / 0x76A0
> >>
> >> OVMF, with the fix:
> >>
> >>   main: 0x8000..0x8810 / 0x0810
> >>   pxb:  0x8810..0x8820 / 0x0010
> >>   pxb:  0x8820..0x88200100 / 0x0100 <- SHPC BAR
> >>   main: 0x88200100..0xFEC0 / 0x769FFF00
> >>
> >> (Note that under OVMF the PCI enumerator includes requests for
> >> prefetchable memory in the nonprefetchable memory pool -- separate windows
> >> for nonprefetchable and prefetchable memory are not supported (yet) --
> >> that's why there's one fewer pxb range in the corrected OVMF case than in
> >> the SeaBIOS case.)
> >>
> >> Cc: Marcel Apfelbaum 
> >> Cc: Michael S. Tsirkin 
> >> Signed-off-by: Laszlo Ersek 
> > 
> > This is problematic - disabled BAR values have no meaning according to
> > the PCI spec.
> > 
> > It might be best to add a property to just disable shpc in the bridge so
> > no devices reside directly behind the pxb?
> 
> If that solves this problem (and I do think it should), then I don't
> have anything against the idea. I guess I could look at other property
> code for examples. I think I'd have the main problem with implementing
> the "policy" part: when exactly do you flip the new property to false,
> and where.

Perhaps when the bridge is created. But if not, perhaps just ask
management to do this if bridge is connected to the PXB.

> (Also, I'm not really sure what the SHPC bar is good for, and
> if it won't be missed by someone who wanted to use PXB otherwise.)
> 
> Thanks
> Laszlo

At the moment it's only used on non-PC systems that lack ACPI.


> >> ---
> >>  hw/i386/acpi-build.c | 4 ++--
> >>  1 file changed, 2 insertions(+), 2 d

[Qemu-devel] [Bug 1462944] Re: vpc file causes qemu-img to consume lots of time and memory

2015-06-08 Thread Richard Jones
This slightly modified example takes about 7 seconds and 2 GB of heap:

$ /usr/bin/time ~/d/qemu/qemu-img info /mnt/scratch/afl13.img 
block-vpc: The header checksum of '/mnt/scratch/afl13.img' is incorrect.
qemu-img: Could not open '/mnt/scratch/afl13.img': block-vpc: 
free_data_block_offset points after the end of file. The image has been 
truncated.
1.84user 5.72system 0:07.59elapsed 99%CPU (0avgtext+0avgdata 
2045496maxresident)k
8inputs+0outputs (0major+507536minor)pagefaults 0swaps


** Attachment added: "afl13.img"
   
https://bugs.launchpad.net/qemu/+bug/1462944/+attachment/4411471/+files/afl13.img

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1462944

Title:
  vpc file causes qemu-img to consume lots of time and memory

Status in QEMU:
  New

Bug description:
  The attached vpc file causes 'qemu-img info' to consume 3 or 4 seconds
  of CPU time and 1.3 GB of heap, causing a minor denial of service.

  $ /usr/bin/time ~/d/qemu/qemu-img info afl12.img
  block-vpc: The header checksum of 'afl12.img' is incorrect.
  qemu-img: Could not open 'afl12.img': block-vpc: free_data_block_offset 
points after the end of file. The image has been truncated.
  1.19user 3.15system 0:04.35elapsed 99%CPU (0avgtext+0avgdata 
1324504maxresident)k
  0inputs+0outputs (0major+327314minor)pagefaults 0swaps

  The file was found using american-fuzzy-lop.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1462944/+subscriptions



Re: [Qemu-devel] fw cfg files cross-version migration races

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 09:21:45AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > So, sorting entries (and the index assigned too) should fix this, right?
> > > That looks easiest to me.
> > 
> > Presumably, anything happening before (and after) user-provided blobs
> > are inserted will continue happening in the same order everywhere.
> 
> Which might change in the future though, in case the initialization
> order changes for some reason.  It's not a problem we have at the
> moment, so this stuff isn't urgent.  But we might face it in the future.
> 
> > So we really only have to sort the user provided blobs, so that if
> > the same *set* is provided on both ends of the migration, things would
> > work out OK.
> 
> I would simply sort everything (and do it for new machine types only,
> for backward compatibility reasons).  Sorting user-provided blobs only
> adds complexity for no reason.
> 
> > If a *different* set of blobs is specified on the migration origin vs.
> > the migration destination, we lose by default and worrying about it is
> > pointless -- did I get that right ?
> 
> Yes.  For migration to succeed you have to supply the same configuration
> on both ends.  That includes user-provided fw_cfg blobs of course.
> 
> cheers,
>   Gerd
> 

Do we want this for all machine types, or only for new ones?

-- 
MST



[Qemu-devel] [Bug 1462949] Re: vmdk files cause qemu-img to consume lots of time and memory

2015-06-08 Thread Richard Jones
Both files were found by using american-fuzzy-lop.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1462949

Title:
  vmdk files cause qemu-img to consume lots of time and memory

Status in QEMU:
  New

Bug description:
  The two attached files cause 'qemu-img info' to consume lots of time
  and memory.  Around 10-12 seconds of CPU time, and around 3-4 GB of
  heap.

  $ /usr/bin/time ~/d/qemu/qemu-img info afl10.img 
  qemu-img: Can't get size of device 'image': File too large
  0.40user 11.57system 0:12.03elapsed 99%CPU (0avgtext+0avgdata 
4197804maxresident)k
  56inputs+0outputs (0major+1045672minor)pagefaults 0swaps

  $ /usr/bin/time ~/d/qemu/qemu-img info afl11.img 
  image: afl11.img
  file format: vmdk
  virtual size: 12802T (14075741666803712 bytes)
  disk size: 4.0K
  cluster_size: 65536
  Format specific information:
  cid: 4294967295
  parent cid: 4294967295
  create type: monolithicSparse
  extents:
  [0]:
  virtual size: 14075741666803712
  filename: afl11.img
  cluster size: 65536
  format: 
  0.29user 9.10system 0:09.43elapsed 99%CPU (0avgtext+0avgdata 
3297360maxresident)k
  8inputs+0outputs (0major+820507minor)pagefaults 0swaps

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1462949/+subscriptions



[Qemu-devel] [Bug 1462949] Re: vmdk files cause qemu-img to consume lots of time and memory

2015-06-08 Thread Richard Jones
** Attachment added: "afl11.img"
   
https://bugs.launchpad.net/qemu/+bug/1462949/+attachment/4411476/+files/afl11.img

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1462949

Title:
  vmdk files cause qemu-img to consume lots of time and memory

Status in QEMU:
  New

Bug description:
  The two attached files cause 'qemu-img info' to consume lots of time
  and memory.  Around 10-12 seconds of CPU time, and around 3-4 GB of
  heap.

  $ /usr/bin/time ~/d/qemu/qemu-img info afl10.img 
  qemu-img: Can't get size of device 'image': File too large
  0.40user 11.57system 0:12.03elapsed 99%CPU (0avgtext+0avgdata 
4197804maxresident)k
  56inputs+0outputs (0major+1045672minor)pagefaults 0swaps

  $ /usr/bin/time ~/d/qemu/qemu-img info afl11.img 
  image: afl11.img
  file format: vmdk
  virtual size: 12802T (14075741666803712 bytes)
  disk size: 4.0K
  cluster_size: 65536
  Format specific information:
  cid: 4294967295
  parent cid: 4294967295
  create type: monolithicSparse
  extents:
  [0]:
  virtual size: 14075741666803712
  filename: afl11.img
  cluster size: 65536
  format: 
  0.29user 9.10system 0:09.43elapsed 99%CPU (0avgtext+0avgdata 
3297360maxresident)k
  8inputs+0outputs (0major+820507minor)pagefaults 0swaps

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1462949/+subscriptions



[Qemu-devel] [Bug 1462949] [NEW] vmdk files cause qemu-img to consume lots of time and memory

2015-06-08 Thread Richard Jones
Public bug reported:

The two attached files cause 'qemu-img info' to consume lots of time and
memory.  Around 10-12 seconds of CPU time, and around 3-4 GB of heap.

$ /usr/bin/time ~/d/qemu/qemu-img info afl10.img 
qemu-img: Can't get size of device 'image': File too large
0.40user 11.57system 0:12.03elapsed 99%CPU (0avgtext+0avgdata 
4197804maxresident)k
56inputs+0outputs (0major+1045672minor)pagefaults 0swaps

$ /usr/bin/time ~/d/qemu/qemu-img info afl11.img 
image: afl11.img
file format: vmdk
virtual size: 12802T (14075741666803712 bytes)
disk size: 4.0K
cluster_size: 65536
Format specific information:
cid: 4294967295
parent cid: 4294967295
create type: monolithicSparse
extents:
[0]:
virtual size: 14075741666803712
filename: afl11.img
cluster size: 65536
format: 
0.29user 9.10system 0:09.43elapsed 99%CPU (0avgtext+0avgdata 
3297360maxresident)k
8inputs+0outputs (0major+820507minor)pagefaults 0swaps

** Affects: qemu
 Importance: Undecided
 Status: New

** Attachment added: "afl10.img"
   https://bugs.launchpad.net/bugs/1462949/+attachment/4411475/+files/afl10.img

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1462949

Title:
  vmdk files cause qemu-img to consume lots of time and memory

Status in QEMU:
  New

Bug description:
  The two attached files cause 'qemu-img info' to consume lots of time
  and memory.  Around 10-12 seconds of CPU time, and around 3-4 GB of
  heap.

  $ /usr/bin/time ~/d/qemu/qemu-img info afl10.img 
  qemu-img: Can't get size of device 'image': File too large
  0.40user 11.57system 0:12.03elapsed 99%CPU (0avgtext+0avgdata 
4197804maxresident)k
  56inputs+0outputs (0major+1045672minor)pagefaults 0swaps

  $ /usr/bin/time ~/d/qemu/qemu-img info afl11.img 
  image: afl11.img
  file format: vmdk
  virtual size: 12802T (14075741666803712 bytes)
  disk size: 4.0K
  cluster_size: 65536
  Format specific information:
  cid: 4294967295
  parent cid: 4294967295
  create type: monolithicSparse
  extents:
  [0]:
  virtual size: 14075741666803712
  filename: afl11.img
  cluster size: 65536
  format: 
  0.29user 9.10system 0:09.43elapsed 99%CPU (0avgtext+0avgdata 
3297360maxresident)k
  8inputs+0outputs (0major+820507minor)pagefaults 0swaps

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1462949/+subscriptions



Re: [Qemu-devel] [RFC PATCH v0] numa: API to lookup NUMA node by address

2015-06-08 Thread Igor Mammedov
On Mon, 8 Jun 2015 11:28:18 +0530
Bharata B Rao  wrote:

> On Mon, May 25, 2015 at 02:42:40PM -0300, Eduardo Habkost wrote:
> > On Mon, May 25, 2015 at 01:17:57PM +0530, Bharata B Rao wrote:
> > > On Thu, May 14, 2015 at 11:39:06AM +0200, Paolo Bonzini wrote:
> > > > On 13/05/2015 20:06, Eduardo Habkost wrote:
> > > > > Also, this introduces a circular dependency between pc-dimm.c and
> > > > > numa.c. Instead of that, pc-dimm could simply notify us when a new
> > > > > device is realized (with just (addr, end, node) as arguments), so we 
> > > > > can
> > > > > save the list of memory ranges inside struct node_info.
> > > > > 
> > > > > I wonder if the memory API already provides something that would help
> > > > > us. Paolo, do you see a way we could simply use a MemoryRegion as 
> > > > > input
> > > > > to lookup the NUMA node?
> > > > 
> > > > No, but I guess you could add a numa_get/set_memory_region_node_id API
> > > > that uses a hash table.  That's a variant of the "pc-dimm could simply
> > > > notify" numa.c that you propose above.
> > > 
> > > While you say we can't use MemoryRegion as input to lookup the NUMA node,
> > > you suggest that we add numa_get/set_memory_region_node_id. Does this API
> > > get/set NUMA node id for the given MemoryRegion ? 
> > 
> > I was going to suggest that, but it would require changing the
> > non-memdev code path to create a MemoryRegion for each node, too. So
> > having a numa_set_mem_node_id(start_addr, end_addr, node_id) API would
> > be simpler.
> 
> In order to save the list of memory ranges inside node_info, I tried this
> approach where I call
> 
> numa_set_mem_node_id(dimm.addr, dimm.size, dimm.node) from
> 
> pc_dimm_realize(), but
> 
> the value of dimm.addr is finalized only later in ->plug().
> 
> So we would have to call this API from arch code like pc_dimm_plug().
> Is that acceptable ?
Could you query pc_dimms' numa property each time you need mapping
instead of additionally storing that mapping elsewhere?

> 
> Regards,
> Bharata.
> 
> 




Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Thibaut Collet
Hi,

I agree that my patch is OK only for recent drivers. To have a simple patch
with only few modifications I do not implement a solution for old driver
but I can be done later.

For legacy guest without VIRTIO_NET_F_GUEST_ANNOUNCE the problem is more
complex. The RARP must be sent by the vhost client/backend. This component
is outside QEMU (the reference implementation is snabbswitch:
http://www.virtualopensystems.com/en/solutions/guides/snabbswitch-qemu/).
To do that:
- a receive function must be defined for the vhost user.
- a message must be added between QEMU and vapp. This message will be sent
only for old guest driver to avoid GARP duplication.
- the added self_announce_disabled must be removed (decision to send or not
the RARP is done later by the backend and not by the generic migration
method)

Do you agree with this solution ?


Regards.

On Mon, Jun 8, 2015 at 11:14 AM, Jason Wang  wrote:

>
>
> On 06/08/2015 04:21 PM, Thibaut Collet wrote:
> > Hi,
> >
> > My understanding of gratuitous packet with virtio for any backend
> > (vhost user or other):
> > - When the VM is loaded (first start or migration) the virtio net
> > interfaces are loaded ( virtio_net_load_device function in
> > hw/net/virtio-net.c)
> > - If the guest has the VIRTIO_NET_F_GUEST_ANNOUNCE capability, request
> > to send gratuitous packet is done.
> >
> > 1. To enable gratuitous packet through this mechanism I have
> > added VIRTIO_NET_F_GUEST_ANNOUNCE capability to hw/net/vhost_net.c. So
> > host and guest can negotiate this feature when vhost-user is used.
> >
> > 2. self announce occurs in case of live migration. During a live
> > migration a GARP is sent to all net backend through a queue dedicated
> > to the net backend.
> >But for vhost-user:
> >- this operation is not possible (vhost-user has no queue)
> >- it is already done with the previous mechanism.
> >Rather to define a queue to vhost user and notify twice the guest
> > to send gratuitous packet I have disable GARP from self announce and
> > use only the first mechanism for that.
> >
> > I have tested my modifications with guest that supports
> > VIRTIO_NET_F_GUEST_ANNOUNCE and vhost-user on the host. After a live
> > migration I have the GARP from the guest.
>
> Yes, your patch works well for recent drivers. But the problem is legacy
> guest/driver without VIRTIO_NET_F_GUEST_ANNOUNCE. In this case there
> will be no GARP sent after migration.
>
> Thanks
>


Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 12:01:38PM +0200, Thibaut Collet wrote:
> Hi,
> 
> I agree that my patch is OK only for recent drivers. To have a simple patch
> with only few modifications I do not implement a solution for old driver but I
> can be done later.
> 
> For legacy guest without VIRTIO_NET_F_GUEST_ANNOUNCE the problem is more
> complex. The RARP must be sent by the vhost client/backend. This component is
> outside QEMU (the reference implementation is snabbswitch: http://
> www.virtualopensystems.com/en/solutions/guides/snabbswitch-qemu/). To do that:
> - a receive function must be defined for the vhost user.
> - a message must be added between QEMU and vapp. This message will be sent 
> only
> for old guest driver to avoid GARP duplication.
> - the added self_announce_disabled must be removed (decision to send or not 
> the
> RARP is done later by the backend and not by the generic migration method)
> 
> Do you agree with this solution ?
> 
> 
> Regards.

I don't get it.  Why do you need any extra messages for old drivers?  To
detect old drivers, simply have backend check whether
VIRTIO_NET_F_GUEST_ANNOUNCE is set.

But I don't see this as a blocker for this patch,
this can be added separately as needed.


> On Mon, Jun 8, 2015 at 11:14 AM, Jason Wang  wrote:
> 
>
> 
> On 06/08/2015 04:21 PM, Thibaut Collet wrote:
> > Hi,
> >
> > My understanding of gratuitous packet with virtio for any backend
> > (vhost user or other):
> > - When the VM is loaded (first start or migration) the virtio net
> > interfaces are loaded ( virtio_net_load_device function in
> > hw/net/virtio-net.c)
> > - If the guest has the VIRTIO_NET_F_GUEST_ANNOUNCE capability, request
> > to send gratuitous packet is done.
> >
> > 1. To enable gratuitous packet through this mechanism I have
> > added VIRTIO_NET_F_GUEST_ANNOUNCE capability to hw/net/vhost_net.c. So
> > host and guest can negotiate this feature when vhost-user is used.
> >
> > 2. self announce occurs in case of live migration. During a live
> > migration a GARP is sent to all net backend through a queue dedicated
> > to the net backend.
> >    But for vhost-user:
> >        - this operation is not possible (vhost-user has no queue)
> >        - it is already done with the previous mechanism.
> >    Rather to define a queue to vhost user and notify twice the guest
> > to send gratuitous packet I have disable GARP from self announce and
> > use only the first mechanism for that.
> >
> > I have tested my modifications with guest that supports
> > VIRTIO_NET_F_GUEST_ANNOUNCE and vhost-user on the host. After a live
> > migration I have the GARP from the guest.
> 
> Yes, your patch works well for recent drivers. But the problem is legacy
> guest/driver without VIRTIO_NET_F_GUEST_ANNOUNCE. In this case there
> will be no GARP sent after migration.
> 
> Thanks
> 
> 



Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Michael S. Tsirkin
On Fri, Jun 05, 2015 at 03:24:12PM +0200, Thibaut Collet wrote:
> Add VIRTIO_NET_F_GUEST_ANNOUNCE capability to vhost-net when netdev backend is
> vhost-user.
> 
> For netdev backend using virtio-net NIC the self announce is managed directly
> by the virtio-net NIC and not by the netdev backend itself.
> To avoid duplication of announces (once from the guest and once from QEMU) a
> bitfield is added in the NetClientState structure.
> If this bit is set self announce does not send message to the guest to request
> gratuitous ARP but let virtio-net NIC set the VIRTIO_NET_S_ANNOUNCE for
> gratuitous ARP.
> 
> Signed-off-by: Thibaut Collet 
> ---
> v2: do not discard anymore packets send to vhost-user: it is GARP request 
> after
> live migration.
> As suggested by S. Hajnoczi qemu_announce_self skips virtio-net NIC that
> already send GARP.
> 
>  hw/net/vhost_net.c |2 ++
>  include/net/net.h  |1 +
>  net/vhost-user.c   |2 ++
>  savevm.c   |   11 ---
>  4 files changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 426b23e..a745f97 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -82,6 +82,8 @@ static const int user_feature_bits[] = {
>  VIRTIO_NET_F_CTRL_MAC_ADDR,
>  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
>  
> +VIRTIO_NET_F_GUEST_ANNOUNCE,
> +
>  VIRTIO_NET_F_MQ,
>  
>  VHOST_INVALID_FEATURE_BIT
> diff --git a/include/net/net.h b/include/net/net.h
> index e66ca03..a78e9df 100644
> --- a/include/net/net.h
> +++ b/include/net/net.h
> @@ -85,6 +85,7 @@ struct NetClientState {
>  char *name;
>  char info_str[256];
>  unsigned receive_disabled : 1;
> +unsigned self_announce_disabled : 1;
>  NetClientDestructor *destructor;
>  unsigned int queue_index;
>  unsigned rxfilter_notify_enabled:1;
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 8d26728..b345446 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -147,6 +147,8 @@ static int net_vhost_user_init(NetClientState *peer, 
> const char *device,
>  
>  s = DO_UPCAST(VhostUserState, nc, nc);
>  
> +/* Self announce is managed directly by virtio-net NIC */
> +s->nc.self_announce_disabled = 1;
>  /* We don't provide a receive callback */
>  s->nc.receive_disabled = 1;
>  s->chr = chr;
> diff --git a/savevm.c b/savevm.c
> index 3b0e222..7a134b1 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -80,11 +80,16 @@ static void qemu_announce_self_iter(NICState *nic, void 
> *opaque)
>  {
>  uint8_t buf[60];
>  int len;
> +NetClientState *nc;
>  
> -trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> -len = announce_self_create(buf, nic->conf->macaddr.a);
> +nc = qemu_get_queue(nic);
>  
> -qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
> +if (!nc->peer->self_announce_disabled) {
> +trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> +len = announce_self_create(buf, nic->conf->macaddr.a);
> +
> +qemu_send_packet_raw(nc, buf, len);
> +}
>  }
>  

I don't think qemu_send_packet_raw can ever work for vhost user.
What happens if you merely add VIRTIO_NET_F_GUEST_ANNOUNCE
to the feature list, and drop the rest of the patch?


> -- 
> 1.7.10.4



[Qemu-devel] [PATCH] s390x/migration: add comment about floating point migration

2015-06-08 Thread Christian Borntraeger
commit 46c804def4bd ("s390x: move fpu regs into a subsection
of the vmstate") moved the fprs into a subsection and bumped
the version number. This will allow to not transfer fprs in
the future if necessary. Add a comment to mark the return true
as intentional.

CC: Juan Quintela 
CC: David Hildenbrand 
Signed-off-by: Christian Borntraeger 
---
 target-s390x/machine.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target-s390x/machine.c b/target-s390x/machine.c
index e52d760..0044749 100644
--- a/target-s390x/machine.c
+++ b/target-s390x/machine.c
@@ -70,6 +70,7 @@ const VMStateDescription vmstate_fpu = {
 
 static inline bool fpu_needed(void *opaque)
 {
+/* This looks odd, but we might want to NOT transfer fprs in the future */
 return true;
 }
 
-- 
2.3.0




Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 10:39, Pavel Fedin  wrote:
>  Last argument: the rest of qemu code (property assignment, lookup
> in PSCI, etc) actually needs IDs without feature bits. The only
> function which needs full version is mpidr_read(). So does it still
> worth of putting AND with bitmasks everywhere? May be just rename
> 'mpidr' to something like 'mp_id' ?

Mmm, OK, if we don't call it mpidr I think this will be OK.
I suggest "mp_affinity".

-- PMM



Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Jason Wang


On 06/08/2015 06:01 PM, Thibaut Collet wrote:
> Hi,
>
> I agree that my patch is OK only for recent drivers. To have a simple
> patch with only few modifications I do not implement a solution for
> old driver but I can be done later.
>

This makes sense.

> For legacy guest without VIRTIO_NET_F_GUEST_ANNOUNCE the problem is
> more complex. The RARP must be sent by the vhost client/backend. This
> component is outside QEMU (the reference implementation is
> snabbswitch: 
> http://www.virtualopensystems.com/en/solutions/guides/snabbswitch-qemu/).
> To do that:
> - a receive function must be defined for the vhost user.
> - a message must be added between QEMU and vapp. This message will be
> sent only for old guest driver to avoid GARP duplication.
> - the added self_announce_disabled must be removed (decision to send
> or not the RARP is done later by the backend and not by the generic
> migration method)
>
> Do you agree with this solution ?
>
>

Sounds ok.

> Regards.
>
> On Mon, Jun 8, 2015 at 11:14 AM, Jason Wang  > wrote:
>
>
>
> On 06/08/2015 04:21 PM, Thibaut Collet wrote:
> > Hi,
> >
> > My understanding of gratuitous packet with virtio for any backend
> > (vhost user or other):
> > - When the VM is loaded (first start or migration) the virtio net
> > interfaces are loaded ( virtio_net_load_device function in
> > hw/net/virtio-net.c)
> > - If the guest has the VIRTIO_NET_F_GUEST_ANNOUNCE capability,
> request
> > to send gratuitous packet is done.
> >
> > 1. To enable gratuitous packet through this mechanism I have
> > added VIRTIO_NET_F_GUEST_ANNOUNCE capability to
> hw/net/vhost_net.c. So
> > host and guest can negotiate this feature when vhost-user is used.
> >
> > 2. self announce occurs in case of live migration. During a live
> > migration a GARP is sent to all net backend through a queue
> dedicated
> > to the net backend.
> >But for vhost-user:
> >- this operation is not possible (vhost-user has no queue)
> >- it is already done with the previous mechanism.
> >Rather to define a queue to vhost user and notify twice the guest
> > to send gratuitous packet I have disable GARP from self announce and
> > use only the first mechanism for that.
> >
> > I have tested my modifications with guest that supports
> > VIRTIO_NET_F_GUEST_ANNOUNCE and vhost-user on the host. After a live
> > migration I have the GARP from the guest.
>
> Yes, your patch works well for recent drivers. But the problem is
> legacy
> guest/driver without VIRTIO_NET_F_GUEST_ANNOUNCE. In this case there
> will be no GARP sent after migration.
>
> Thanks
>
>




[Qemu-devel] poor virtio-scsi performance

2015-06-08 Thread Vasiliy Tolstov
Hi all!

I suspected poor performance of virtio-scsi driver.
I did a few tests:
   Host machine: linux 3.19.1, QEMU emulator version 2.3.0
   Guest machine: linux 4.0.4

   part of domain xml:
/usr/bin/kvm

  
  
  
  
  
  


/dev/ram0 I got by running `modprobe brd rd_size=$((5*1024*1024))` on
host machine.

fio conf:
  [readtest]
  blocksize=4k
  filename=/dev/sdb (/dev/ram0 whe test from host machine)
  rw=randread
  direct=1
  buffered=0
  ioengine=libaio
  iodepth=32


results:
  from host:
bw=1594.6MB/s, iops=408196, clat=76usec
  from guest:
bw=398MB/s, iops=99720, clat=316usec

Both host and guest system I boot with `scsi_mod.use_blk_mq=Y`.

Why difference in 4 times?!

-- 
Vasiliy Tolstov,
e-mail: v.tols...@selfip.ru



Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Igor Mammedov
On Thu, 4 Jun 2015 18:17:39 +0100
Peter Maydell  wrote:

> On 4 June 2015 at 17:40, Shlomo Pongratz  wrote:
> > From: Pavel Fedin 
> 
> Hi. I think this patch largely makes sense, but I have some comments
> below. If you want to fix these and resend as a standalone patch
> I'm happy to apply that.
> 
> > 1. MPIDR value (minus feature bits) is now directly stored in CPU instance.
> > 2. Default assignment is based on original rule (8 CPUs per cluster).
> > 3. If KVM is used, MPIDR values are overridden with those from KVM. This is
> > necessary for
> > proper KVM PSCI functioning.
> > 4. Some cosmetic changes which would make further expansion of this code 
> > easier.
> > 5. Added some debugging macros because CPU ID assignment is tricky part, 
> > and if
> > there are
> > some problems, i think there should be a quick way to make sure they are
> > correct.
> >  This does not have an RFC mark because it is perfectly okay to be committed
> > alone, it
> > does not break anything. Commit message follows. Cc'ed to all interested
> > parties because i
> > really hope to get things going somewhere this time.
> 
> This sort of commentary belongs below the '---' line, so it doesn't
> end up in the final git commit message.
> 
> > In order to support up to 128 cores with GIC-500 (GICv3 implementation)
> > affinity1 must be used. GIC-500 support up to 32 clusters with up to
> > 8 cores in a cluster. So for example, if one wishes to have 16 cores,
> > the options are: 2 clusters of 8 cores each, 4 clusters with 4 cores each
> > Currently only the first option is supported for TCG. However, KVM expects
> > to have the same clusters layout as host machine has. Therefore, if we use
> > 64-bit KVM we override CPU IDs with those provided by the KVM. For 32-bit
> > systems it is OK to leave the default because so far we do not have more
> > than 8 CPUs on any of them.
> > This implementation has a potential to be extended with some methods which
> > would define cluster layout instead of hardcoded CPUS_PER_CLUSTER
> > definition. This would allow to emulate real machines with different
> > layouts. However, in case of KVM we would still have to inherit IDs from
> > the host.
> 
> I agree that we want to be using the same idea of MPIDR as KVM
> when KVM is enabled, certainly. But this commit message has
> a number of inaccuracies:
>  * KVM doesn't inherit IDs from the host
>  * we don't need to have the same cluster layout as the host machine
>  * this isn't specific to 64-bit VMs so we should fix 32 bits at the same time
> 
> The bug we're actually trying to fix here is just that the
> kernel's mapping from VCPU ID to MPIDR value has changed
> from the simplistic implementation it originally had. So in
> userspace we need to read the MPIDR value back from the kernel
> rather than assuming we know the mapping. (It happens that the
> reason the kernel changed was to accommodate GICv3 and its
> restrictions on affinity values, but that's not relevant to why
> this patch is needed.)
> 
> > Signed-off-by: Shlomo Pongratz 
> > Signed-off-by: Pavel Fedin 
> > ---
> >  hw/arm/virt.c|  6 +-
> >  target-arm/cpu-qom.h |  3 +++
> >  target-arm/cpu.c | 17 +
> >  target-arm/helper.c  |  9 +++--
> >  target-arm/kvm64.c   | 25 +
> >  target-arm/psci.c| 20 ++--
> >  6 files changed, 71 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 05db8cb..19c8c3a 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -294,7 +294,11 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
> >  "enable-method", "psci");
> >  }
> >
> > -qemu_fdt_setprop_cell(vbi->fdt, nodename, "reg", cpu);
> > +/*
> > + * If cpus node's #address-cells property is set to 1
> > + * The reg cell bits [23:0] must be set to bits [23:0] of 
> > MPIDR_EL1.
> > + */
> 
> This comment is either obvious or misleading depending on your
> point of view.
> 
> > +qemu_fdt_setprop_cell(vbi->fdt, nodename, "reg", armcpu->mpidr);
> >  g_free(nodename);
> >  }
> >  }
> > diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
> > index ed5a644..a382a09 100644
> > --- a/target-arm/cpu-qom.h
> > +++ b/target-arm/cpu-qom.h
> > @@ -159,6 +159,7 @@ typedef struct ARMCPU {
> >  uint64_t id_aa64mmfr1;
> >  uint32_t dbgdidr;
> >  uint32_t clidr;
> > +uint64_t mpidr; /* Without feature bits */
> 
> I think it would be less confusing to store the entire MPIDR
> here, and then mask it out if there are places that only
> want the affinity bits.
> 
> >  /* The elements of this array are the CCSIDR values for each cache,
> >   * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
> >   */
> > @@ -171,6 +172,8 @@ typedef struct ARMCPU {
> >  uint64_t rvbar;
> >  } ARMCPU;
> >
> > +#define CPUS_PER_CLUSTER 8
> 
> Changes I suggest

[Qemu-devel] [PATCH v2 1/3] block: Extrace bdrv_parse_detect_zeroes_flags

2015-06-08 Thread Fam Zheng
The logic will be shared with qmp_drive_mirror.

Signed-off-by: Fam Zheng 
---
 block.c   | 26 ++
 blockdev.c| 14 ++
 include/block/block.h |  3 +++
 3 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/block.c b/block.c
index 9890707..d9686c5 100644
--- a/block.c
+++ b/block.c
@@ -33,6 +33,7 @@
 #include "qemu/notify.h"
 #include "block/coroutine.h"
 #include "block/qapi.h"
+#include "qapi/util.h"
 #include "qmp-commands.h"
 #include "qemu/timer.h"
 #include "qapi-event.h"
@@ -671,6 +672,31 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
 return 0;
 }
 
+BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes_flags(const char *mode,
+   int bdrv_flags,
+   Error **errp)
+{
+Error *local_err = NULL;
+BlockdevDetectZeroesOptions detect_zeroes;
+detect_zeroes =
+qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+mode,
+BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+&local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return detect_zeroes;
+}
+
+if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+!(bdrv_flags & BDRV_O_UNMAP)) {
+error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting discard operation to unmap");
+}
+return detect_zeroes;
+}
+
 /*
  * Returns the flags that a temporary snapshot should get, based on the
  * originally requested flags (the originally requested image will have flags
diff --git a/blockdev.c b/blockdev.c
index 6a4..5ad6960 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -483,23 +483,13 @@ static BlockBackend *blockdev_init(const char *file, 
QDict *bs_opts,
 }
 
 detect_zeroes =
-qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
-qemu_opt_get(opts, "detect-zeroes"),
-BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
-BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
-&error);
+bdrv_parse_detect_zeroes_flags(qemu_opt_get(opts, "detect-zeroes"),
+   bdrv_flags, &error);
 if (error) {
 error_propagate(errp, error);
 goto early_err;
 }
 
-if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
-!(bdrv_flags & BDRV_O_UNMAP)) {
-error_setg(errp, "setting detect-zeroes to unmap is not allowed "
- "without setting discard operation to unmap");
-goto early_err;
-}
-
 /* init */
 if ((!file || !*file) && !has_driver_specific_opts) {
 blk = blk_new_with_bs(qemu_opts_id(opts), errp);
diff --git a/include/block/block.h b/include/block/block.h
index 4d9b555..b7905ab 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -194,6 +194,9 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState 
*bs_old);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_parse_discard_flags(const char *mode, int *flags);
+BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes_flags(const char *mode,
+   int bdrv_flags,
+   Error **errp);
 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
 QDict *options, const char *bdref_key, int flags,
 bool allow_none, Error **errp);
-- 
2.4.2




[Qemu-devel] [PATCH v2 3/3] iotests: Add test cases for drive-mirror "detect-zeroes" option

2015-06-08 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/132| 28 +---
 tests/qemu-iotests/132.out|  4 ++--
 tests/qemu-iotests/iotests.py |  7 +++
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/132 b/tests/qemu-iotests/132
index f53ef6e..a4a4f01 100644
--- a/tests/qemu-iotests/132
+++ b/tests/qemu-iotests/132
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Test mirror with unmap
+# Test mirror with unmap and zero source clusters
 #
 # Copyright (C) 2015 Red Hat, Inc.
 #
@@ -21,7 +21,7 @@
 import time
 import os
 import iotests
-from iotests import qemu_img, qemu_io
+from iotests import qemu_img, qemu_io, qemu_img_map
 
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
@@ -55,5 +55,27 @@ class TestSingleDrive(iotests.QMPTestCase):
 self.assertTrue(iotests.compare_images(test_img, target_img),
 'target image does not match source after mirroring')
 
+def do_detect_zeroes_test(self, detect_zeroes, unmap):
+self.vm.hmp_qemu_io('drive0', 'write -P 0 0 2M')
+result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ target=target_img, detect_zeroes=detect_zeroes,
+ unmap=unmap)
+self.assert_qmp(result, 'return', {})
+self.complete_and_wait('drive0')
+self.vm.shutdown()
+return qemu_img_map(target_img)
+
+def test_detect_zeroes(self):
+m = self.do_detect_zeroes_test(detect_zeroes=True, unmap=False);
+self.assertTrue(m[0]["zero"])
+
+def test_detect_zeroes_unmap(self):
+m = self.do_detect_zeroes_test(detect_zeroes=True, unmap=True);
+self.assertTrue(m[0]["zero"])
+
+def test_no_detect_zeroes(self):
+m = self.do_detect_zeroes_test(detect_zeroes=False, unmap=False);
+self.assertFalse(m[0]["zero"])
+
 if __name__ == '__main__':
-iotests.main(supported_fmts=['raw', 'qcow2'])
+iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/132.out b/tests/qemu-iotests/132.out
index ae1213e..89968f3 100644
--- a/tests/qemu-iotests/132.out
+++ b/tests/qemu-iotests/132.out
@@ -1,5 +1,5 @@
-.
+
 --
-Ran 1 tests
+Ran 4 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 8615b10..2ddc735 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -27,6 +27,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 
'..', 'scripts', '
 import qmp
 import qtest
 import struct
+import json
 
 __all__ = ['imgfmt', 'imgproto', 'test_dir' 'qemu_img', 'qemu_io',
'VM', 'QMPTestCase', 'notrun', 'main']
@@ -58,6 +59,12 @@ def qemu_img_pipe(*args):
 '''Run qemu-img and return its output'''
 return subprocess.Popen(qemu_img_args + list(args), 
stdout=subprocess.PIPE).communicate()[0]
 
+def qemu_img_map(*args):
+'''Run qemu-img map and return the result parsed from the json formated
+output '''
+output = qemu_img_pipe(*(['map', '--output=json'] + list(args)))
+return json.loads(output)
+
 def qemu_io(*args):
 '''Run qemu-io and return the stdout data'''
 args = qemu_io_args + list(args)
-- 
2.4.2




[Qemu-devel] [PATCH v2 0/3] mirror: Allow detection of zeroes on source sectors

2015-06-08 Thread Fam Zheng
[These patches go on top of the "block: Mirror discarded sectors" series]

v2: Rely on block/io.c zero detection. [Paolo]

Some protocols don't have an easy way to query sparseness, (e.g. block/nfs.c,
block/nbd.c), for which block layer always reports block status as "allocated
data".

This will let mirror job do full provisioning even if data is actually sparse
under the hood.

With the new "detect-zeroes" option, we can let mirror job detect zeroes before
sending the data to target, and use zero write when it is more efficient.

Fam

Fam Zheng (3):
  block: Extrace bdrv_parse_detect_zeroes_flags
  qapi: Add "detect-zeroes" option to drive-mirror
  iotests: Add test cases for drive-mirror "detect-zeroes" option

 block.c   | 26 ++
 blockdev.c| 40 +++-
 hmp.c |  2 +-
 include/block/block.h |  3 +++
 qapi/block-core.json  |  4 +++-
 qmp-commands.hx   |  4 +++-
 tests/qemu-iotests/132| 28 +---
 tests/qemu-iotests/132.out|  4 ++--
 tests/qemu-iotests/iotests.py |  7 +++
 9 files changed, 97 insertions(+), 21 deletions(-)

-- 
2.4.2




[Qemu-devel] [PATCH v2 2/3] qapi: Add "detect-zeroes" option to drive-mirror

2015-06-08 Thread Fam Zheng
The new optional flag defaults to true, in which case, mirror job would
check the read sectors and use sparse write if they are zero.  Otherwise
data will be fully copied.

Signed-off-by: Fam Zheng 
---
 blockdev.c   | 26 +-
 hmp.c|  2 +-
 qapi/block-core.json |  4 +++-
 qmp-commands.hx  |  4 +++-
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 5ad6960..3d008a2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2622,6 +2622,7 @@ void qmp_drive_mirror(const char *device, const char 
*target,
   bool has_on_source_error, BlockdevOnError 
on_source_error,
   bool has_on_target_error, BlockdevOnError 
on_target_error,
   bool has_unmap, bool unmap,
+  bool has_detect_zeroes, bool detect_zeroes,
   Error **errp)
 {
 BlockBackend *blk;
@@ -2634,6 +2635,7 @@ void qmp_drive_mirror(const char *device, const char 
*target,
 int flags;
 int64_t size;
 int ret;
+const char *detect_zeroes_str;
 
 if (!has_speed) {
 speed = 0;
@@ -2656,6 +2658,9 @@ void qmp_drive_mirror(const char *device, const char 
*target,
 if (!has_unmap) {
 unmap = true;
 }
+if (!has_detect_zeroes) {
+detect_zeroes = true;
+}
 
 if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) 
{
 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -2770,11 +2775,21 @@ void qmp_drive_mirror(const char *device, const char 
*target,
 goto out;
 }
 
+options = qdict_new();
 if (has_node_name) {
-options = qdict_new();
 qdict_put(options, "node-name", qstring_from_str(node_name));
 }
 
+if (unmap) {
+flags |= BDRV_O_UNMAP;
+}
+
+if (detect_zeroes) {
+detect_zeroes_str = unmap ? "unmap" : "on";
+} else {
+detect_zeroes_str = "off";
+}
+
 /* Mirroring takes care of copy-on-write using the source's backing
  * file.
  */
@@ -2786,6 +2801,15 @@ void qmp_drive_mirror(const char *device, const char 
*target,
 goto out;
 }
 
+target_bs->detect_zeroes =
+bdrv_parse_detect_zeroes_flags(detect_zeroes_str,
+   flags,
+   &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+goto out;
+}
+
 bdrv_set_aio_context(target_bs, aio_context);
 
 /* pass the node name to replace to mirror start since it's loose coupling
diff --git a/hmp.c b/hmp.c
index d5b9ebb..2056b53 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1057,7 +1057,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
  false, NULL, false, NULL,
  full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
  true, mode, false, 0, false, 0, false, 0,
- false, 0, false, 0, false, true, &err);
+ false, 0, false, 0, false, true, false, true, &err);
 hmp_handle_error(mon, &err);
 }
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 71ed838..d157f0f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -978,6 +978,8 @@
 # target image sectors will be unmapped; otherwise, zeroes will be
 # written. Both will result in identical contents.
 # Default is true. (Since 2.4)
+# @detect-zeroes: #optional Whether to detect zero sectors in source and use
+# zero write on target. Default is true. (Since 2.4)
 #
 # Returns: nothing on success
 #  If @device is not a valid block device, DeviceNotFound
@@ -991,7 +993,7 @@
 '*speed': 'int', '*granularity': 'uint32',
 '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError',
-'*unmap': 'bool' } }
+'*unmap': 'bool', '*detect-zeroes': 'bool' } }
 
 ##
 # @BlockDirtyBitmap
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 3b33496..efeb77f 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1503,7 +1503,7 @@ EQMP
 .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
   "node-name:s?,replaces:s?,"
   "on-source-error:s?,on-target-error:s?,"
-  "unmap:b?,"
+  "unmap:b?,detect-zeroes:b?"
   "granularity:i?,buf-size:i?",
 .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
 },
@@ -1545,6 +1545,8 @@ Arguments:
   (BlockdevOnError, default 'report')
 - "unmap": whether the target sectors should be discarded where source has only
   zeroes. (json-bool, optional, default true)
+- "detect-zeroes": if true, the source sectors that are zeroes will be written 
as
+  sparse on target. (json-bool, optional, default true)
 
 The default value of the granularity is the image cluster 

[Qemu-devel] [PATCH v8 0/4] remove icc bus/bridge

2015-06-08 Thread Zhu Guihua
ICC Bus was used for providing a hotpluggable bus for APIC and CPU,
but now we use HotplugHandler to make hotplug. So ICC Bus is
unnecessary.

This code has passed the new pc-cpu-test.
And I have tested with kvm along with kernel_irqchip=on/off,
it works fine.

This patch series is based on Eduardo's x86 tree.
https://github.com/ehabkost/qemu.git

v8:
 -add a wrapper to specify reset order

v7:
 -update to register reset handler for main_system_bus when created
 -register reset handler for apic after all devices are initialized

v6:
 -reword commit message
 -drop NULL check for APIC device
 -use C cast instead of QOM cast

v5:
 -convert DEVICE() casts to C casts
 -use a local variable instead of doing the cast inline twice
 -drop to set cpu's parent bus
 -rename patch 3's subject
 -fix a bug about setting cpu's apic base

v4:
 -add wrapper to get root memory region from address space
 -set cpu apic base's default value in x86_cpu_apic_create()
 -drop NULL check for cpu apic_state
 -put drop of the unused files about icc_bus into a seprate patch
 -put DEVICE() casts into a seprate patch

v3:
 -replace init apic by object_new()
 -add reset apic at the time of CPU reset

Chen Fan (2):
  apic: map APIC's MMIO region at each CPU's address space
  cpu/apic: drop icc bus/bridge

Zhu Guihua (2):
  hw: add a wrapper for registering reset handler
  icc_bus: drop the unused files

 default-configs/i386-softmmu.mak   |   1 -
 default-configs/x86_64-softmmu.mak |   1 -
 exec.c |   5 ++
 hw/cpu/Makefile.objs   |   1 -
 hw/cpu/icc_bus.c   | 118 -
 hw/i386/pc.c   |  31 +++---
 hw/i386/pc_piix.c  |   9 +--
 hw/i386/pc_q35.c   |   9 +--
 hw/intc/apic_common.c  |  19 +++---
 include/exec/memory.h  |   5 ++
 include/hw/cpu/icc_bus.h   |  82 --
 include/hw/hw.h|   4 ++
 include/hw/i386/apic_internal.h|   7 ++-
 include/hw/i386/pc.h   |   2 +-
 target-i386/cpu.c  |  25 +---
 target-i386/cpu.h  |   4 ++
 vl.c   |  18 +-
 17 files changed, 78 insertions(+), 263 deletions(-)
 delete mode 100644 hw/cpu/icc_bus.c
 delete mode 100644 include/hw/cpu/icc_bus.h

-- 
1.9.3




[Qemu-devel] [PATCH v8 2/4] hw: add a wrapper for registering reset handler

2015-06-08 Thread Zhu Guihua
Add a wrapper to specify reset order when registering reset handler,
instead of non-obvious initiazation code ordering.

Signed-off-by: Zhu Guihua 
---
 include/hw/hw.h |  4 
 vl.c| 18 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/hw/hw.h b/include/hw/hw.h
index c78adae..d9375e7 100644
--- a/include/hw/hw.h
+++ b/include/hw/hw.h
@@ -37,7 +37,11 @@
 #endif
 
 typedef void QEMUResetHandler(void *opaque);
+typedef uint64_t QEMUResetOrder;
+#define default_reset_order 0x0
 
+void qemu_register_reset_common(QEMUResetHandler *func, void *opaque,
+QEMUResetOrder reset_order);
 void qemu_register_reset(QEMUResetHandler *func, void *opaque);
 void qemu_unregister_reset(QEMUResetHandler *func, void *opaque);
 
diff --git a/vl.c b/vl.c
index cdd81b4..a0d3e5e 100644
--- a/vl.c
+++ b/vl.c
@@ -1492,6 +1492,7 @@ typedef struct QEMUResetEntry {
 QTAILQ_ENTRY(QEMUResetEntry) entry;
 QEMUResetHandler *func;
 void *opaque;
+QEMUResetOrder reset_order;
 } QEMUResetEntry;
 
 static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
@@ -1575,15 +1576,30 @@ static int qemu_debug_requested(void)
 return r;
 }
 
-void qemu_register_reset(QEMUResetHandler *func, void *opaque)
+void qemu_register_reset_common(QEMUResetHandler *func, void *opaque,
+QEMUResetOrder reset_order)
 {
+QEMUResetEntry *item;
 QEMUResetEntry *re = g_malloc0(sizeof(QEMUResetEntry));
 
 re->func = func;
 re->opaque = opaque;
+re->reset_order = reset_order;
+
+QTAILQ_FOREACH(item, &reset_handlers, entry) {
+if (re->reset_order >= item->reset_order)
+continue;
+QTAILQ_INSERT_BEFORE(item, re, entry);
+return;
+}
 QTAILQ_INSERT_TAIL(&reset_handlers, re, entry);
 }
 
+void qemu_register_reset(QEMUResetHandler *func, void *opaque)
+{
+qemu_register_reset_common(func, opaque, default_reset_order);
+}
+
 void qemu_unregister_reset(QEMUResetHandler *func, void *opaque)
 {
 QEMUResetEntry *re;
-- 
1.9.3




[Qemu-devel] [PATCH v8 1/4] apic: map APIC's MMIO region at each CPU's address space

2015-06-08 Thread Zhu Guihua
From: Chen Fan 

Replace mapping APIC at global system address space with
mapping it at per-CPU address spaces.

Signed-off-by: Chen Fan 
Signed-off-by: Zhu Guihua 
---
 exec.c|  5 +
 hw/i386/pc.c  |  7 ---
 hw/intc/apic_common.c | 14 --
 include/exec/memory.h |  5 +
 target-i386/cpu.c |  2 ++
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/exec.c b/exec.c
index e19ab22..71c02ed 100644
--- a/exec.c
+++ b/exec.c
@@ -2712,6 +2712,11 @@ void address_space_unmap(AddressSpace *as, void *buffer, 
hwaddr len,
 cpu_notify_map_clients();
 }
 
+MemoryRegion *address_space_root_memory_region(AddressSpace *as)
+{
+return as->root;
+}
+
 void *cpu_physical_memory_map(hwaddr addr,
   hwaddr *plen,
   int is_write)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 2baff4a..0682b27 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1097,13 +1097,6 @@ void pc_cpus_init(const char *cpu_model, DeviceState 
*icc_bridge)
 object_unref(OBJECT(cpu));
 }
 
-/* map APIC MMIO area if CPU has APIC */
-if (cpu && cpu->apic_state) {
-/* XXX: what if the base changes? */
-sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0,
-APIC_DEFAULT_ADDRESS, 0x1000);
-}
-
 /* tell smbios about cpuid version and features */
 smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
 }
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d595d63..f251787 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -296,7 +296,8 @@ static void apic_common_realize(DeviceState *dev, Error 
**errp)
 APICCommonClass *info;
 static DeviceState *vapic;
 static int apic_no;
-static bool mmio_registered;
+CPUState *cpu = CPU(s->cpu);
+MemoryRegion *root;
 
 if (apic_no >= MAX_APICS) {
 error_setg(errp, "%s initialization failed.",
@@ -307,11 +308,12 @@ static void apic_common_realize(DeviceState *dev, Error 
**errp)
 
 info = APIC_COMMON_GET_CLASS(s);
 info->realize(dev, errp);
-if (!mmio_registered) {
-ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev));
-memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory);
-mmio_registered = true;
-}
+
+root = address_space_root_memory_region(cpu->as);
+memory_region_add_subregion_overlap(root,
+s->apicbase & MSR_IA32_APICBASE_BASE,
+&s->io_memory,
+0x1000);
 
 /* Note: We need at least 1M to map the VAPIC option ROM */
 if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b61c84f..a16650f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1295,6 +1295,11 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
  int is_write, hwaddr access_len);
 
+/* address_space_root_memory_region: get root memory region
+ *
+ * @as: #AddressSpace to be accessed
+ */
+MemoryRegion *address_space_root_memory_region(AddressSpace *as);
 
 #endif
 
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 99ad551..e38943e 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2740,6 +2740,8 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
 /* TODO: convert to link<> */
 apic = APIC_COMMON(cpu->apic_state);
 apic->cpu = cpu;
+cpu_set_apic_base(cpu->apic_state,
+  APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE);
 }
 
 static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
-- 
1.9.3




Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Jan Beulich
>>> On 08.06.15 at 11:30,  wrote:
> On Mon, Jun 08, 2015 at 08:42:57AM +0100, Jan Beulich wrote:
>> Otoh the respective root port also has
>> - Received Master Abort set in its Secondary Status register (but
>>   that's also already the case in the log that we have before the UR
>>   occurs, i.e. that doesn't mean all that much),
>> - Received System Error set in its Secondary Status register (and
>>   after the UR the sibling endpoint [UR originating from 83:00.0,
>>   sibling being 83:00.1] also shows Signaled System Error set).
> 
> It's another function of the same physical device, correct?

Yes (obviously with their BDF only differing in the function).

> So is this sibling the only function sending SERR?

Yes, albeit I can't tell whether the root port generated SERR on
its own or in response to the endpoint doing so.

> What happens if you disable SERR# in the command register
> of 83:00.1?

Any experiments with that system will be quite difficult, as they're
only accessible by partners or ours. But I'll ask to try this if you
think this can provide useful insight.

>> > Do we can chalk this up to hardware bugs on a specific box?
>> 
>> I have to admit that I'm still very uncertain whether to consider all
>> this correct behavior, a firmware flaw, or a hardware bug.
> 
> Questions:
> 1.  Does this only happen with a specific endpoint?
> What if you add another endpoint to the same system?

We've got reports of this for two systems (two different vendors)
using a Broadcomm NIC in one case and an Intel one in the other.

> 2.  Has a driver initialized this endpoint? What if you don't
> load a driver before sending the transaction resulting in the UR?

I'd have to ask for this to be tried too, and getting an answer
may take some time.

Jan




Re: [Qemu-devel] [PATCH RFC V3 1/4] Use Aff1 with mpidr This is an improved and KVM-aware alternative to

2015-06-08 Thread Igor Mammedov
On Mon, 08 Jun 2015 12:39:56 +0300
Pavel Fedin  wrote:

>  Hello!
> 
> > >  I thought about it when i developed this, but in my implementation
> > > affinity bits are assigned during CPU instantiation, while feature
> > > bits can be added later, during realize. I could tweak set_feature()
> > > to sync up MPIDR value but perhaps it isn't the best thing to do.
> > 
> > Can we not just assign the whole thing at realize?
> 
>  kvm_arch_init_vcpu() is called before realize, therefore i would have to 
> track down whether KVM is active or not.

that's because call flow looks wrong.
If KVM would be fixed to use QEMU's mpidr then you can like x86 target

1. create vcpu
2. set mpidr property on it from board code
3. call cpu.realize() -> which will push mpidr to KVM


> 
> > You need to fix the 32-bit side anyway.
> 
>  Actually already done in my working tree.
> 
> > I still think that having the whole mpidr in the struct will
> > be less confusing.
> 
>  Ok, if you are really insisting on that, i can assign IDs where this is 
> currently done and add feature bits in arm_cpu_realizefn().
>  Last argument: the rest of qemu code (property assignment, lookup in PSCI, 
> etc) actually needs IDs without feature bits. The only function which needs 
> full version is mpidr_read(). So does it still worth of putting AND with 
> bitmasks everywhere? May be just rename 'mpidr' to something like 'mp_id' ?
> 
> Kind regards,
> Pavel Fedin
> Expert Engineer
> Samsung Electronics Research center Russia
> 




[Qemu-devel] [PATCH v8 4/4] icc_bus: drop the unused files

2015-06-08 Thread Zhu Guihua
ICC bus impl has been droped, so all icc related files are not useful
any more; delete them.

Signed-off-by: Zhu Guihua 
---
 default-configs/i386-softmmu.mak   |   1 -
 default-configs/x86_64-softmmu.mak |   1 -
 hw/cpu/Makefile.objs   |   1 -
 hw/cpu/icc_bus.c   | 118 -
 include/hw/cpu/icc_bus.h   |  82 --
 5 files changed, 203 deletions(-)
 delete mode 100644 hw/cpu/icc_bus.c
 delete mode 100644 include/hw/cpu/icc_bus.h

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 91d602c..0759f22 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -42,7 +42,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index 2f2955b..23dbf51 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -42,7 +42,6 @@ CONFIG_LPC_ICH9=y
 CONFIG_PCI_Q35=y
 CONFIG_APIC=y
 CONFIG_IOAPIC=y
-CONFIG_ICC_BUS=y
 CONFIG_PVPANIC=y
 CONFIG_MEM_HOTPLUG=y
 CONFIG_XIO3130=y
diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs
index 6381238..0954a18 100644
--- a/hw/cpu/Makefile.objs
+++ b/hw/cpu/Makefile.objs
@@ -2,5 +2,4 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o
 obj-$(CONFIG_REALVIEW) += realview_mpcore.o
 obj-$(CONFIG_A9MPCORE) += a9mpcore.o
 obj-$(CONFIG_A15MPCORE) += a15mpcore.o
-obj-$(CONFIG_ICC_BUS) += icc_bus.o
 
diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c
deleted file mode 100644
index 6646ea2..000
--- a/hw/cpu/icc_bus.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/* icc_bus.c
- * emulate x86 ICC (Interrupt Controller Communications) bus
- *
- * Copyright (c) 2013 Red Hat, Inc
- *
- * Authors:
- * Igor Mammedov 
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see 
- */
-#include "hw/cpu/icc_bus.h"
-#include "hw/sysbus.h"
-
-/* icc-bridge implementation */
-
-static const TypeInfo icc_bus_info = {
-.name = TYPE_ICC_BUS,
-.parent = TYPE_BUS,
-.instance_size = sizeof(ICCBus),
-};
-
-
-/* icc-device implementation */
-
-static void icc_device_realize(DeviceState *dev, Error **errp)
-{
-ICCDeviceClass *idc = ICC_DEVICE_GET_CLASS(dev);
-
-/* convert to QOM */
-if (idc->realize) {
-idc->realize(dev, errp);
-}
-
-}
-
-static void icc_device_class_init(ObjectClass *oc, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(oc);
-
-dc->realize = icc_device_realize;
-dc->bus_type = TYPE_ICC_BUS;
-}
-
-static const TypeInfo icc_device_info = {
-.name = TYPE_ICC_DEVICE,
-.parent = TYPE_DEVICE,
-.abstract = true,
-.instance_size = sizeof(ICCDevice),
-.class_size = sizeof(ICCDeviceClass),
-.class_init = icc_device_class_init,
-};
-
-
-/*  icc-bridge implementation */
-
-typedef struct ICCBridgeState {
-/*< private >*/
-SysBusDevice parent_obj;
-/*< public >*/
-
-ICCBus icc_bus;
-MemoryRegion apic_container;
-} ICCBridgeState;
-
-#define ICC_BRIDGE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE)
-
-static void icc_bridge_init(Object *obj)
-{
-ICCBridgeState *s = ICC_BRIDGE(obj);
-SysBusDevice *sb = SYS_BUS_DEVICE(obj);
-
-qbus_create_inplace(&s->icc_bus, sizeof(s->icc_bus), TYPE_ICC_BUS,
-DEVICE(s), "icc");
-
-/* Do not change order of registering regions,
- * APIC must be first registered region, board maps it by 0 index
- */
-memory_region_init(&s->apic_container, obj, "icc-apic-container",
-   APIC_SPACE_SIZE);
-sysbus_init_mmio(sb, &s->apic_container);
-s->icc_bus.apic_address_space = &s->apic_container;
-}
-
-static void icc_bridge_class_init(ObjectClass *oc, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(oc);
-
-set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-}
-
-static const TypeInfo icc_bridge_info = {
-.name  = TYPE_ICC_BRIDGE,
-.parent = TYPE_SYS_BUS_DEVICE,
-.instance_init  = icc_bridge_init,
-.instance_size  = sizeof(ICCBridgeState),
-.class_init = icc_bridge_class_init,
-};
-
-
-static void icc_bus_register_types(void)
-{
-type_register_static(&icc_bus_info);
-type_register_static(&icc_device_info);
-type_register_static(

[Qemu-devel] [PATCH v8 3/4] cpu/apic: drop icc bus/bridge

2015-06-08 Thread Zhu Guihua
From: Chen Fan 

After CPU hotplug has been converted to BUS-less hot-plug infrastructure,
the only function ICC bus performs is to propagate reset to LAPICs. However
LAPIC could be reset by registering its reset handler after all device are
initialized.
Do so and drop ~200LOC of not needed anymore ICCBus related code.

Signed-off-by: Chen Fan 
Signed-off-by: Zhu Guihua 
---
 hw/i386/pc.c| 24 +---
 hw/i386/pc_piix.c   |  9 +
 hw/i386/pc_q35.c|  9 +
 hw/intc/apic_common.c   |  5 ++---
 include/hw/i386/apic_internal.h |  7 ---
 include/hw/i386/pc.h|  2 +-
 target-i386/cpu.c   | 23 +++
 target-i386/cpu.h   |  4 
 8 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0682b27..f34d59e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -59,7 +59,6 @@
 #include "qemu/config-file.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/cpu_hotplug.h"
-#include "hw/cpu/icc_bus.h"
 #include "hw/boards.h"
 #include "hw/pci/pci_host.h"
 #include "acpi-build.h"
@@ -990,27 +989,25 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int 
level)
 }
 }
 
+#define x86_cpu_apic_reset_order 0x1
+
 static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
-  DeviceState *icc_bridge, Error **errp)
+  Error **errp)
 {
 X86CPU *cpu = NULL;
 Error *local_err = NULL;
 
-if (icc_bridge == NULL) {
-error_setg(&local_err, "Invalid icc-bridge value");
-goto out;
-}
-
 cpu = cpu_x86_create(cpu_model, &local_err);
 if (local_err != NULL) {
 goto out;
 }
 
-qdev_set_parent_bus(DEVICE(cpu), qdev_get_child_bus(icc_bridge, "icc"));
-
 object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
 
+qemu_register_reset_common(x86_cpu_apic_reset, cpu,
+   x86_cpu_apic_reset_order);
+
 out:
 if (local_err) {
 error_propagate(errp, local_err);
@@ -1024,7 +1021,6 @@ static const char *current_cpu_model;
 
 void pc_hot_add_cpu(const int64_t id, Error **errp)
 {
-DeviceState *icc_bridge;
 X86CPU *cpu;
 int64_t apic_id = x86_cpu_apic_id_from_index(id);
 Error *local_err = NULL;
@@ -1053,9 +1049,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
 return;
 }
 
-icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
- TYPE_ICC_BRIDGE, NULL));
-cpu = pc_new_cpu(current_cpu_model, apic_id, icc_bridge, &local_err);
+cpu = pc_new_cpu(current_cpu_model, apic_id, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1063,7 +1057,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
 object_unref(OBJECT(cpu));
 }
 
-void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
+void pc_cpus_init(const char *cpu_model)
 {
 int i;
 X86CPU *cpu = NULL;
@@ -1089,7 +1083,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState 
*icc_bridge)
 
 for (i = 0; i < smp_cpus; i++) {
 cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i),
- icc_bridge, &error);
+ &error);
 if (error) {
 error_report_err(error);
 exit(1);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 5253e6d..60ae3ec 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -39,7 +39,6 @@
 #include "hw/kvm/clock.h"
 #include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
-#include "hw/cpu/icc_bus.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/block-backend.h"
 #include "hw/i2c/smbus.h"
@@ -97,7 +96,6 @@ static void pc_init1(MachineState *machine)
 MemoryRegion *ram_memory;
 MemoryRegion *pci_memory;
 MemoryRegion *rom_memory;
-DeviceState *icc_bridge;
 PcGuestInfo *guest_info;
 ram_addr_t lowmem;
 
@@ -141,11 +139,7 @@ static void pc_init1(MachineState *machine)
 exit(1);
 }
 
-icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
-object_property_add_child(qdev_get_machine(), "icc-bridge",
-  OBJECT(icc_bridge), NULL);
-
-pc_cpus_init(machine->cpu_model, icc_bridge);
+pc_cpus_init(machine->cpu_model);
 
 if (kvm_enabled() && kvmclock_enabled) {
 kvmclock_create();
@@ -228,7 +222,6 @@ static void pc_init1(MachineState *machine)
 if (pci_enabled) {
 ioapic_init_gsi(gsi_state, "i440fx");
 }
-qdev_init_nofail(icc_bridge);
 
 pc_register_ferr_irq(gsi[13]);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 110dfb7..902e5bb 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -43,7 +43,6 @@
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
 #include "hw/usb.h"
-#include "hw/cpu/icc_bus.h"
 #include 

Re: [Qemu-devel] [PATCH 0/2] vmdk: Fix vmdk_co_get_block_status

2015-06-08 Thread Kevin Wolf
Am 04.06.2015 um 08:02 hat Fam Zheng geschrieben:
> The buggy index_in_cluster was missed in b1649fae49a8. Fix that and dedup the
> calculation.

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH v2 2/3] qapi: Add "detect-zeroes" option to drive-mirror

2015-06-08 Thread Paolo Bonzini


On 08/06/2015 12:34, Fam Zheng wrote:
>  - "unmap": whether the target sectors should be discarded where source has 
> only
>zeroes. (json-bool, optional, default true)
> +- "detect-zeroes": if true, the source sectors that are zeroes will be 
> written as
> +  sparse on target. (json-bool, optional, default true)

all source sectors that are zeroes will be written using offloaded zero
writes on the target; furthermore, the sectors on the target will be
made sparse if possible if "unmap" is also true.

Perhaps it's simpler to use the enum though.  I'm not sure.  Jeff?

Paolo



Re: [Qemu-devel] [PATCH v2] memory_mapping: Rework cpu related includes

2015-06-08 Thread Paolo Bonzini


On 07/06/2015 23:59, Peter Crosthwaite wrote:
> This makes it more consistent with all other core code files, which
> either just rely on qemu-common.h inclusion or precede cpu.h with
> qemu-common.h.
> 
> cpu-all.h should not be included in addition to cpu.h. Remove it.
> 
> Signed-off-by: Peter Crosthwaite 
> ---
> changed since v1:
> Leave in cpu.h include
> 
> Picked up by my multi arch WIP where target-multi/cpu.h cant handle
> random core code inclusion without preceeded qemu-common.h. I guess
> this is the only one in tree?
> ---
>  memory_mapping.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/memory_mapping.c b/memory_mapping.c
> index 7b69801..36d6b26 100644
> --- a/memory_mapping.c
> +++ b/memory_mapping.c
> @@ -13,8 +13,8 @@
>  
>  #include 
>  
> +#include "qemu-common.h"
>  #include "cpu.h"
> -#include "exec/cpu-all.h"
>  #include "sysemu/memory_mapping.h"
>  #include "exec/memory.h"
>  #include "exec/address-spaces.h"
> 

Acked-by: Paolo Bonzini 

About to leave on vacation, so someone else will have to pick it up.

Paolo



Re: [Qemu-devel] Trying to execute code outside RAM or ROM at 0x08000230

2015-06-08 Thread Liviu Ionescu

> On 08 Jun 2015, at 12:17, Peter Maydell  wrote:
> 
> On 8 June 2015 at 09:46, Liviu Ionescu  wrote:
>> 
>> Q: is there any simple way to get rid of them?
> 
> This is probably the readline support (so you can do cursor
> editing of command lines). You can turn that off, though I forget
> the syntax -- it should be documented somewhere.

I could not find anything in the manual.

> I would have expected that either the Eclipse console did all
> its own editing and cursor movement and just sent the finished
> line to QEMU, or that if it's sending the cursor escapes when
> you do cursor movement that it doesn't get echoed back.)

it might be an interference between the Eclipse simple consoles and QEMU 
expecting full terminal support.

> What is printing the "Execute ..." line? A quick grep of the
> sources suggests it's not QEMU.

it is part of the increased verbosity needed by my use case. for this I added 
-verbose, which can be issued multiple times to increase the verbosity level.

the QEMU plugin issues a GDB custom 'monitor system_reset' command after 
loading the ELF file, and I need to see it in the console.

unfortunately my implementation is faulty, I need to check if the monitor is 
running in an interactive session and no longer display the verbosity related 
messages.

(btw, is there a simple way to tell if the monitor is running interactive or 
the command came from GDB?)

> the 
>> as for memory map, I get:
>> 
>> memory
>> - (prio 0, RW): system
>>  -0001 (prio 0, R-): cortexm-mem-flash
>>  0800-0801 (prio 0, R-): alias stm32-mem-flash-alias 
>> @system -0001
> 
> This is still aliasing the whole system address space, rather
> than just the flash device. The effects will be the same but
> it's a conceptual error I think.

ah, right, I finally got your point, the address range is ok, but the @system 
name is wrong.

now it reads:

  -0001 (prio 0, R-): cortexm-mem-flash
  0800-0801 (prio 0, R-): alias stm32-mem-flash-alias 
@cortexm-mem-flash -0001

> What's the cortexm-mem-hack ?

from arm7vm.c:

/* Hack to map an additional page of ram at the top of the address
   space.  This stops qemu complaining about executing code outside RAM
   when returning from an exception.  */
memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_abort);
vmstate_register_ram_global(hack);
memory_region_add_subregion(system_memory, 0xf000, hack);


regards,

Liviu




[Qemu-devel] should KVM or userspace be the one which decides what MIPIDR/affinity values to assign to vcpus?

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 11:32, Igor Mammedov  wrote:
> On Thu, 4 Jun 2015 18:17:39 +0100
> Peter Maydell  wrote:
>> On 4 June 2015 at 17:40, Shlomo Pongratz  wrote:
>>  In order for it to work correctly we must use MPIDR values in
>>  the device tree which match the MPIDR values the kernel has picked
>>  for the vcpus, so ask KVM what those values are.

> Could we set QEMU's generated mpidr in kernel instead of pulling it from 
> kernel,
> like we do with APIC ID in x86 and fix kernel not to reset it its own value
> (i.e. untie mpidr from vcpuid)?
>
> Then later we could move setting mpidr from kvm_arch_init_vcpu() into
> board code which should be setting it, since it knows/defines what topology 
> it has.

This is a question better asked on the kvmarm list (which I have cc'd),
because that is where the kernel folks hang out...

-- PMM



Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Jan Beulich
>>> On 08.06.15 at 11:36,  wrote:
> On Mon, Jun 08, 2015 at 10:03:18AM +0100, Jan Beulich wrote:
>> >>> On 08.06.15 at 10:09,  wrote:
>> > I believe the correct behaviour is happening but a PCIE completion timeout 
>> > is occurring instead of a
>> > unsupported request.
>> 
>> Might it be that with the supposedly correct device returning UR
>> the root port reissues the request to the sibling device, which then
>> fails it in a more dramatic way (albeit the sibling's Uncorrectable
>> Error Status Register also has only Unsupported Request Error
>> Status set)?
> 
> Isn't the sibling a function on the same device?

Yes.

> And is the request causing the UR a memory read?

No, it's a write according to the ITP log.

> If so doesn't this use address routing?
> What does it mean that the request is "to the sibling device" then?

The way I expressed things above may simply be due to my limited
knowledge of PCIe terminology: I simply don't know (and can't find
this spelled out in the spec) what the root port's behavior ought to
be when a transaction comes in with an address that's within one of
its base/limit regions, but none of the endpoints can fulfill the
request. But you asking this made me look more closely at the
memory ranges dumped out to the ITP log: The root port has

0x20: Memory Base  = 0xca40
0x22: Memory Limit = 0xca40
0x24: Prefetchable Mem Base= 0xca21
0x26: Prefetchable Mem Limit   = 0xca21

while function 0 has

0x10: Base Address Register 0  = 0xca23000c (Memory space, 64-bit access, 
prefetchable)
0x18: Base Address Register 2  = 0xca24000c (Memory space, 64-bit access, 
prefetchable)
0x20: Base Address Register 4  = 0xca25000c (Memory space, 64-bit access, 
prefetchable)

and function 1

0x10: Base Address Register 0  = 0xca2c (Memory space, 64-bit access, 
prefetchable)
0x18: Base Address Register 2  = 0xca21000c (Memory space, 64-bit access, 
prefetchable)
0x20: Base Address Register 4  = 0xca22000c (Memory space, 64-bit access, 
prefetchable)

Isn't is bogus for all but one of the BARs being outside the root
ports prefetchable memory window (the MSI-X tables being inside
the BAR 4 region in both cases)?

> Does the sibling device have a BAR overlapping the address?

No, its BARs are fully separate.

Jan




[Qemu-devel] [PULL v3 00/62] KVM, dirty bitmap, build system, SMM, icount changes for 2015-06-05

2015-06-08 Thread Paolo Bonzini
The following changes since commit 00967f4e0bab246679d0ddc32fd31a7179345baf:

  Merge remote-tracking branch 'remotes/agraf/tags/signed-s390-for-upstream' 
into staging (2015-06-05 12:04:42 +0100)

are available in the git repository at:

  git://github.com/bonzini/qemu.git tags/for-upstream

for you to fetch changes up to 24a314269281a175b5540b3b6a8981ed2e8220e1:

  update Linux headers from kvm/next (2015-06-05 19:45:13 +0200)

One line fix in "q35: add test for SMRAM.D_LCK", which was using the wrong file
name for the Makefile rule.


* KVM error improvement from Laurent
* CONFIG_PARALLEL fix from Mirek
* Atomic/optimized dirty bitmap access from myself and Stefan
* BUILD_DIR convenience/bugfix from Peter C
* Memory leak fix from Shannon
* SMM improvements (though still TCG only) from myself and Gerd, acked by mst


Fam Zheng (1):
  qemu-nbd: Switch to qemu_set_fd_handler

Gerd Hoffmann (6):
  q35: fix ESMRAMC default
  q35: add config space wmask for SMRAM and ESMRAMC
  q35: implement SMRAM.D_LCK
  q35: add test for SMRAM.D_LCK
  q35: implement TSEG
  ich9: implement SMI_LOCK

Laurent Vivier (1):
  ppc: add helpful message when KVM fails to start VCPU

Miroslav Rezanina (1):
  Move parallel_hds_isa_init to hw/isa/isa-bus.c

Paolo Bonzini (43):
  exec: optimize phys_page_set_level
  memory: the only dirty memory flag for users is DIRTY_MEMORY_VGA
  g364fb: remove pointless call to memory_region_set_coalescing
  display: enable DIRTY_MEMORY_VGA tracking explicitly
  display: add memory_region_sync_dirty_bitmap calls
  memory: differentiate memory_region_is_logging and 
memory_region_get_dirty_log_mask
  memory: prepare for multiple bits in the dirty log mask
  framebuffer: check memory_region_is_logging
  ui/console: remove dpy_gfx_update_dirty
  memory: track DIRTY_MEMORY_CODE in mr->dirty_log_mask
  kvm: accept non-mapped memory in kvm_dirty_pages_log_change
  memory: include DIRTY_MEMORY_MIGRATION in the dirty log mask
  kvm: remove special handling of DIRTY_MEMORY_MIGRATION in the dirty log 
mask
  ram_addr: tweaks to xen_modified_memory
  exec: use memory_region_get_dirty_log_mask to optimize dirty tracking
  exec: move functions to translate-all.h
  translate-all: remove unnecessary argument to tb_invalidate_phys_range
  cputlb: remove useless arguments to tlb_unprotect_code_phys, rename
  translate-all: make less of tb_invalidate_phys_page_range depend on 
is_cpu_write_access
  exec: pass client mask to cpu_physical_memory_set_dirty_range
  exec: invert return value of cpu_physical_memory_get_clean, rename
  exec: only check relevant bitmaps for cleanliness
  memory: do not touch code dirty bitmap unless TCG is enabled
  memory: use mr->ram_addr in "is this RAM?" assertions
  target-i386: introduce cpu_get_mem_attrs
  target-i386: Use correct memory attributes for memory accesses
  target-i386: Use correct memory attributes for ioport accesses
  target-i386: mask NMIs on entry to SMM
  target-i386: set G=1 in SMM big real mode selectors
  target-i386: wake up processors that receive an SMI
  pflash_cfi01: change big-endian property to BIT type
  pflash_cfi01: change to new-style MMIO accessors
  pflash_cfi01: add secure property
  vl: allow full-blown QemuOpts syntax for -global
  qom: add object_property_add_const_link
  vl: run "late" notifiers immediately
  target-i386: create a separate AddressSpace for each CPU
  hw/i386: add a separate region that tracks the SMRAME bit
  target-i386: use memory API to implement SMRAM
  hw/i386: remove smram_update
  q35: implement high SMRAM
  atomics: add explicit compiler fence in __atomic memory barriers
  update Linux headers from kvm/next

Peter Crosthwaite (1):
  Makefile.target: set master BUILD_DIR

Stefan Hajnoczi (6):
  bitmap: add atomic set functions
  bitmap: add atomic test and clear
  memory: use atomic ops for setting dirty memory bits
  migration: move dirty bitmap sync to ram_addr.h
  memory: replace cpu_physical_memory_reset_dirty() with test-and-clear
  memory: make cpu_physical_memory_sync_dirty_bitmap() fully atomic

Victor CLEMENT (3):
  icount: implement a new icount_sleep mode toggleing real-time cpu sleep
  icount: add sleep parameter to the icount option to set icount_sleep mode
  icount: print a warning if there is no more deadline in sleep=no mode

 Makefile.target  |   2 +
 arch_init.c  |  46 +---
 bsd-user/main.c  |   4 -
 cpus.c   |  84 ---
 cputlb.c |   7 +-
 exec.c   

Re: [Qemu-devel] Trying to execute code outside RAM or ROM at 0x08000230

2015-06-08 Thread Peter Maydell
On 8 June 2015 at 11:51, Liviu Ionescu  wrote:
>> On 08 Jun 2015, at 12:17, Peter Maydell  wrote:
>> What is printing the "Execute ..." line? A quick grep of the
>> sources suggests it's not QEMU.
>
> it is part of the increased verbosity needed by my use case.
> for this I added -verbose, which can be issued multiple times
> to increase the verbosity level.

Is this verbosity also the thing printing the line with all
the escape characters in it? Do you get those when you're
actually editing the line as you type them in?

>> What's the cortexm-mem-hack ?
>
> from arm7vm.c:
>
> /* Hack to map an additional page of ram at the top of the address
>space.  This stops qemu complaining about executing code outside RAM
>when returning from an exception.  */
> memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000, &error_abort);
> vmstate_register_ram_global(hack);
> memory_region_add_subregion(system_memory, 0xf000, hack);

I wondered if it might be that, but that memory region is named
"armv7m.hack". There's no "cortexm-mem-hack" in the current QEMU
sources.

-- PMM



Re: [Qemu-devel] [PATCH 0/3] acpi: Clean up some GLib compatibility cruft

2015-06-08 Thread Markus Armbruster
Markus Armbruster  writes:

> Markus Armbruster (3):
>   Revert "aml-build: fix build for glib < 2.22"
>   acpi: Drop superfluous GLIB_CHECK_VERSION()
>   acpi: Simplify printing to dynamic string

3/3 got in (commit c3bdc56, thanks), but what about the other two?  They
still apply cleanly.



Re: [Qemu-devel] QEMU's CVE Procedures

2015-06-08 Thread Stefano Stabellini
On Fri, 5 Jun 2015, John Snow wrote:
> - Topal: Output generated on Mon Jun  8 11:48:03 BST 2015
> - Topal: GPG output starts -
> gpg: Signature made Fri 05 Jun 2015 23:16:30 BST using RSA key ID AAFC390E
> gpg: Can't check signature: public key not found
> - Topal: GPG output ends -
> - Topal: Original message starts -
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA256
> 
> Hi, everyone:
> 
> ("Oh no, what monolith did John type up this time? /Golly Dang He's
> really giving Markus a run for his money/")
> 
> Prompted by the recent CVE-2015-3456 ("VENOM") issue, it seems to me
> that our CVE handling procedure is a little more ad-hoc than it should
> reasonably be. This is not the first attempt to help rectify our CVE
> process -- see Peter Maydell's 2.3 post-mortem.
> 
> Our Security Process page ( http://wiki.qemu.org/SecurityProcess )
> details a list of persons one may contact when confronted with a
> security issue, but it's not a mailing list of trusted persons, only a
> handful of contacts. The only email list mentioned here is actually a
> Red Hat list, not an upstream one.
> 
> 
> (1) Would it be worth creating an upstream non-public list as a single
> point of contact to be able to reach out to? It could include all of
> the names printed here (And -- hey! Why is Peter Maydell not on this
> list? ...) to make it secure and easy to grab hold of the right people
> who understand the security policies.

+1


> (2) What is our CVE management policy for maintainers?
> This policy page also doesn't specify what maintainers should do or
> how they should handle CVEs if they are approached by e.g. the Red Hat
> security team, so our policy should expand a little to include
> instructions for some of the maintainers and sub-maintainers.
> 
> Not that the procedure should be complicated, but spelling it out
> can't hurt. Specific policy questions follow below.

I think that this is the point that is going to be harder to write. Feel
free to take a look at the Xen security process for inspiration:

http://www.xenproject.org/security-policy.html

Xen maintains its own pre-disclosure list, but aside from that, the rest
should be pretty reusable.


> (3) How do maintainers get reviews? Who do they ask?
> CVEs by nature are handled behind closed doors during the embargo
> period, so patches can't just be posted to the list at large. So there
> needs to be a review process in place for sub-maintainers that we can
> follow to get patches properly reviewed in a secure and private manner
> before the embargo date.
> 
> We could leave this process "ad-hoc" and trust that maintainers know
> who to ask for reviews, but I do not want anyone accused of "slipping
> in fixes" away from the eyes of the list, so a more formal procedure
> will help prevent any accusations of impropriety.
> 
> We likely need a list or mechanism where we can query a portion of
> developers who have agreed to assist with CVE patch reviews. This
> could include the existing security team, Peter Maydell, other
> sub-maintainers, developers with a lot of general experience and/or
> experience with the subsystem in question.
> 
> This could be perhaps the same email list as above (expanded to
> include a couple of reviewers) and those reviewers could loop in other
> developers on a case-by-case basis to get appropriate reviews, or we
> could create a wiki page with GPG keys and so forth of trusted
> reviewers that sub-maintainers can reach out to for reviews on a
> case-by-case basis.
> 
> Anything would be better than "Guess and go for it."

This should be easy: I think that people in the security list should
include anybody they feel would be able to contribute, as long as they
previously agree on the terms of the embargo.


> (4) How much review is sufficient?
> 
> A CVE patch should probably get at least one review from someone who
> is not Peter Maydell or the subsystem maintainer.
> 
> After it's been looked over by these 2-3 people it should be handed
> over to Peter Maydell to pre-approve prior to the embargo date being
> lifted so that the patch can be pulled promptly after disclosure is
> public.
>
> (5) What should be posted on embargo day?
> 
> A pull request of the patch(es) with all of the ACKs and R-Bs acquired
> during the embargo period alongside the patch being posted to
> qemu-devel and qemu-stable simultaneously seems sufficient.
> 
> Any additional review or changes incurred after the embargo date can
> be handled publicly and in the normal manner in response to the patch.
> 
> 
> (6) What about qemu-stable?
> 
> Our stable process is somewhat lacking with respect to the CVE
> process. It is good that we occasionally publish stable fix roundups
> that downstream maintainers can base their work off of, but it would
> be good to have a branch where we can have CVE fixes posted promptly.

+1


> (7) How long should we support a stable branch?
> 
> We should figure out how many stable release trees we 

Re: [Qemu-devel] [PATCH 1/2] block-backend: Introduce blk_drain() and replace blk_drain_all()

2015-06-08 Thread Kevin Wolf
Am 03.06.2015 um 15:46 hat Alexander Yarygin geschrieben:
> Each call of the virtio_blk_reset() function calls blk_drain_all(),
> which works for all existing BlockDriverStates, while only one
> BlockDriverState needs to be drained.
> 
> This patch introduces the blk_drain() function and replaces
> blk_drain_all() on it in virtio_blk_reset().
> 
> Cc: Christian Borntraeger 
> Cc: Cornelia Huck 
> Cc: Kevin Wolf 
> Cc: Paolo Bonzini 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Alexander Yarygin 

For the BlockBackend part:

Acked-by: Kevin Wolf 

The only thing that I could suggest is that if you respin, it would be
cleaner to split this into two parts (as small as they may be). That you
have an "and" in your subject line is a strong hint for this.

Kevin



Re: [Qemu-devel] Trying to execute code outside RAM or ROM at 0x08000230

2015-06-08 Thread Liviu Ionescu

> On 08 Jun 2015, at 13:56, Peter Maydell  wrote:
> 
> Is this verbosity also the thing printing the line with all
> the escape characters in it?

I don't think so, the code added at around line 4135 in monitor.c looks like:

#if defined(CONFIG_VERBOSE)
if (verbosity_level >= VERBOSITY_COMMON) {
printf("Execute 'mon %s'.\n\n", cmdline);
}
#endif
cmd->mhandler.cmd(mon, qdict);

>  Do you get those when you're
> actually editing the line as you type them in?

no, editing seems fine, the garbled line is displayed after Enter.

> I wondered if it might be that, but that memory region is named
> "armv7m.hack". There's no "cortexm-mem-hack" in the current QEMU
> sources.

I'm not using armv7m.c, I have a separate hierarchy of objects, like 
"cortexm-mcu", "stm32-mcu", "stm32f103rb", etc.

regards,

Liviu




Re: [Qemu-devel] [PATCH] tcg: Mask TCGMemOp appropriately for indexing

2015-06-08 Thread Yongbok Kim
On 01/06/2015 16:28, Richard Henderson wrote:
> On 06/01/2015 02:42 AM, Yongbok Kim wrote:
>> I have noticed that you haven't changed it for the tci, is it deliberate?
>> Another minor thing is that tcg_dump_ops() in tcg.c wouldn't print ld/st
>> names correctly but hex code will be printed out.
> 
> Correct on both counts.  I'll get that fixed up too.
> 
> 
> r~
> 

Any update?

Regards,
Yongbok




[Qemu-devel] [PATCH v4 0/4] Extend TPM support with a QEMU-external TPM

2015-06-08 Thread Stefan Berger
The following series of patches extends TPM support with an
external TPM that offers a Linux CUSE (character device in userspace)
interface. This TPM lets each VM access its own private vTPM.
The CUSE TPM supports suspend/resume and migration. Much
out-of-band functionality necessary to control the CUSE TPM is
implemented using ioctls.

Stefan Berger (4):
  Provide support for the CUSE TPM
  Introduce condition to notify waiters of completed command
  Introduce condition in TPM backend for notification
  Add support for VM suspend/resume for TPM TIS

 hmp.c|   6 +
 hw/tpm/tpm_int.h |   4 +
 hw/tpm/tpm_ioctl.h   | 209 ++
 hw/tpm/tpm_passthrough.c | 409 +--
 hw/tpm/tpm_tis.c | 151 +++-
 hw/tpm/tpm_tis.h |   2 +
 hw/tpm/tpm_util.c| 223 +++
 hw/tpm/tpm_util.h|   7 +
 include/sysemu/tpm_backend.h |  12 ++
 qapi-schema.json |  18 +-
 qemu-options.hx  |  21 ++-
 qmp-commands.hx  |   2 +-
 tpm.c|  11 +-
 13 files changed, 1056 insertions(+), 19 deletions(-)
 create mode 100644 hw/tpm/tpm_ioctl.h

-- 
1.9.3




[Qemu-devel] [PATCH v4 2/4] Introduce condition to notify waiters of completed command

2015-06-08 Thread Stefan Berger
Introduce a lock and a condition to notify anyone waiting for the completion
of the execution of a TPM command by the backend (thread). The backend
uses the condition to signal anyone waiting for command completion.
We need to place the condition in two locations: one is invoked by the
backend thread, the other by the bottom half thread.
We will use the signalling to wait for command completion before VM
suspend.

Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_int.h |  3 +++
 hw/tpm/tpm_tis.c | 14 ++
 2 files changed, 17 insertions(+)

diff --git a/hw/tpm/tpm_int.h b/hw/tpm/tpm_int.h
index 6b2c9c9..70be1ad 100644
--- a/hw/tpm/tpm_int.h
+++ b/hw/tpm/tpm_int.h
@@ -30,6 +30,9 @@ struct TPMState {
 char *backend;
 TPMBackend *be_driver;
 TPMVersion be_tpm_version;
+
+QemuMutex state_lock;
+QemuCond cmd_complete;
 };
 
 #define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS)
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 0806b5f..4f6b6bc 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -366,6 +366,8 @@ static void tpm_tis_receive_bh(void *opaque)
 TPMTISEmuState *tis = &s->s.tis;
 uint8_t locty = s->locty_number;
 
+qemu_mutex_lock(&s->state_lock);
+
 tpm_tis_sts_set(&tis->loc[locty],
 TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE);
 tis->loc[locty].state = TPM_TIS_STATE_COMPLETION;
@@ -382,6 +384,10 @@ static void tpm_tis_receive_bh(void *opaque)
 tpm_tis_raise_irq(s, locty,
   TPM_TIS_INT_DATA_AVAILABLE | TPM_TIS_INT_STS_VALID);
 #endif
+
+/* notify of completed command */
+qemu_cond_signal(&s->cmd_complete);
+qemu_mutex_unlock(&s->state_lock);
 }
 
 /*
@@ -401,6 +407,11 @@ static void tpm_tis_receive_cb(TPMState *s, uint8_t locty,
 }
 }
 
+qemu_mutex_lock(&s->state_lock);
+/* notify of completed command */
+qemu_cond_signal(&s->cmd_complete);
+qemu_mutex_unlock(&s->state_lock);
+
 qemu_bh_schedule(tis->bh);
 }
 
@@ -1070,6 +1081,9 @@ static void tpm_tis_initfn(Object *obj)
 memory_region_init_io(&s->mmio, OBJECT(s), &tpm_tis_memory_ops,
   s, "tpm-tis-mmio",
   TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT);
+
+qemu_mutex_init(&s->state_lock);
+qemu_cond_init(&s->cmd_complete);
 }
 
 static void tpm_tis_class_init(ObjectClass *klass, void *data)
-- 
1.9.3




[Qemu-devel] [PATCH v4 3/4] Introduce condition in TPM backend for notification

2015-06-08 Thread Stefan Berger
TPM backends will suspend independently of the frontends. Also
here we need to be able to wait for the TPM command to have been
completely processed.

Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_passthrough.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/hw/tpm/tpm_passthrough.c b/hw/tpm/tpm_passthrough.c
index 68cb5e7..89c77c8 100644
--- a/hw/tpm/tpm_passthrough.c
+++ b/hw/tpm/tpm_passthrough.c
@@ -76,6 +76,10 @@ struct TPMPassthruState {
 TPMVersion tpm_version;
 ptm_cap cuse_cap; /* capabilities of the CUSE TPM */
 uint8_t cur_locty_number; /* last set locality */
+
+QemuMutex state_lock;
+QemuCond cmd_complete;  /* singnaled once tpm_busy is false */
+bool tpm_busy;
 };
 
 typedef struct TPMPassthruState TPMPassthruState;
@@ -250,6 +254,11 @@ static void tpm_passthrough_worker_thread(gpointer data,
 thr_parms->recv_data_callback(thr_parms->tpm_state,
   thr_parms->tpm_state->locty_number,
   selftest_done);
+/* result delivered */
+qemu_mutex_lock(&tpm_pt->state_lock);
+tpm_pt->tpm_busy = false;
+qemu_cond_signal(&tpm_pt->cmd_complete);
+qemu_mutex_unlock(&tpm_pt->state_lock);
 break;
 case TPM_BACKEND_CMD_INIT:
 case TPM_BACKEND_CMD_END:
@@ -377,6 +386,7 @@ static void tpm_passthrough_reset(TPMBackend *tb)
 tpm_backend_thread_end(&tpm_pt->tbt);
 
 tpm_pt->had_startup_error = false;
+tpm_pt->tpm_busy = false;
 }
 
 static int tpm_passthrough_init(TPMBackend *tb, TPMState *s,
@@ -454,6 +464,11 @@ static void tpm_passthrough_deliver_request(TPMBackend *tb)
 {
 TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
 
+/* TPM considered busy once TPM Request scheduled for processing */
+qemu_mutex_lock(&tpm_pt->state_lock);
+tpm_pt->tpm_busy = true;
+qemu_mutex_unlock(&tpm_pt->state_lock);
+
 tpm_backend_thread_deliver_request(&tpm_pt->tbt);
 }
 
@@ -722,6 +737,11 @@ static const TPMDriverOps tpm_passthrough_driver = {
 
 static void tpm_passthrough_inst_init(Object *obj)
 {
+TPMBackend *tb = TPM_BACKEND(obj);
+TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
+
+qemu_mutex_init(&tpm_pt->state_lock);
+qemu_cond_init(&tpm_pt->cmd_complete);
 }
 
 static void tpm_passthrough_inst_finalize(Object *obj)
-- 
1.9.3




Re: [Qemu-devel] fw cfg files cross-version migration races

2015-06-08 Thread Gerd Hoffmann
> > I would simply sort everything (and do it for new machine types only,

> > for backward compatibility reasons).  Sorting user-provided blobs only
> > adds complexity for no reason.

> Do we want this for all machine types, or only for new ones?





[Qemu-devel] [PATCH v4 4/4] Add support for VM suspend/resume for TPM TIS

2015-06-08 Thread Stefan Berger
Extend the TPM TIS code to support suspend/resume. In case a command
is being processed by the external TPM when suspending, wait for the command
to complete to catch the result. In case the bottom half did not run,
run the one function the bottom half is supposed to run. This then
makes the resume operation work.

The passthrough backend does not support suspend/resume operation
and is therefore blocked from suspend/resume and migration.

The CUSE TPM's supported capabilities are tested and if sufficient
capabilities are implemented, suspend/resume, snapshotting and
migration are supported by the CUSE TPM.

Signed-off-by: Stefan Berger 
---
 hw/tpm/tpm_passthrough.c | 129 +++--
 hw/tpm/tpm_tis.c | 137 +-
 hw/tpm/tpm_tis.h |   2 +
 hw/tpm/tpm_util.c| 223 +++
 hw/tpm/tpm_util.h|   7 ++
 include/sysemu/tpm_backend.h |  12 +++
 6 files changed, 502 insertions(+), 8 deletions(-)

diff --git a/hw/tpm/tpm_passthrough.c b/hw/tpm/tpm_passthrough.c
index 89c77c8..04edf00 100644
--- a/hw/tpm/tpm_passthrough.c
+++ b/hw/tpm/tpm_passthrough.c
@@ -35,6 +35,7 @@
 #include "tpm_tis.h"
 #include "tpm_util.h"
 #include "tpm_ioctl.h"
+#include "migration/migration.h"
 
 #define DEBUG_TPM 0
 
@@ -50,6 +51,7 @@
 #define TYPE_TPM_CUSE "tpm-cuse"
 
 static const TPMDriverOps tpm_passthrough_driver;
+static const VMStateDescription vmstate_tpm_cuse;
 
 /* data structures */
 typedef struct TPMPassthruThreadParams {
@@ -80,6 +82,10 @@ struct TPMPassthruState {
 QemuMutex state_lock;
 QemuCond cmd_complete;  /* singnaled once tpm_busy is false */
 bool tpm_busy;
+
+Error *migration_blocker;
+
+TPMBlobBuffers tpm_blobs;
 };
 
 typedef struct TPMPassthruState TPMPassthruState;
@@ -282,6 +288,10 @@ static void tpm_passthrough_shutdown(TPMPassthruState 
*tpm_pt)
  strerror(errno));
 }
 }
+if (tpm_pt->migration_blocker) {
+migrate_del_blocker(tpm_pt->migration_blocker);
+error_free(tpm_pt->migration_blocker);
+}
 }
 
 /*
@@ -336,12 +346,14 @@ static int 
tpm_passthrough_cuse_check_caps(TPMPassthruState *tpm_pt)
 /*
  * Initialize the external CUSE TPM
  */
-static int tpm_passthrough_cuse_init(TPMPassthruState *tpm_pt)
+static int tpm_passthrough_cuse_init(TPMPassthruState *tpm_pt,
+ bool is_resume)
 {
 int rc = 0;
-ptm_init init = {
-.u.req.init_flags = INIT_FLAG_DELETE_VOLATILE,
-};
+ptm_init init;
+if (is_resume) {
+init.u.req.init_flags = INIT_FLAG_DELETE_VOLATILE;
+}
 
 if (TPM_PASSTHROUGH_USES_CUSE_TPM(tpm_pt)) {
 if (ioctl(tpm_pt->tpm_fd, PTM_INIT, &init) < 0) {
@@ -370,7 +382,7 @@ static int tpm_passthrough_startup_tpm(TPMBackend *tb)
   tpm_passthrough_worker_thread,
   &tpm_pt->tpm_thread_params);
 
-tpm_passthrough_cuse_init(tpm_pt);
+tpm_passthrough_cuse_init(tpm_pt, false);
 
 return 0;
 }
@@ -442,6 +454,32 @@ static int 
tpm_passthrough_reset_tpm_established_flag(TPMBackend *tb,
 return rc;
 }
 
+static int tpm_cuse_get_state_blobs(TPMBackend *tb,
+bool decrypted_blobs,
+TPMBlobBuffers *tpm_blobs)
+{
+TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
+
+assert(TPM_PASSTHROUGH_USES_CUSE_TPM(tpm_pt));
+
+return tpm_util_cuse_get_state_blobs(tpm_pt->tpm_fd, decrypted_blobs,
+ tpm_blobs);
+}
+
+static int tpm_cuse_set_state_blobs(TPMBackend *tb,
+TPMBlobBuffers *tpm_blobs)
+{
+TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
+
+assert(TPM_PASSTHROUGH_USES_CUSE_TPM(tpm_pt));
+
+if (tpm_util_cuse_set_state_blobs(tpm_pt->tpm_fd, tpm_blobs)) {
+return 1;
+}
+
+return tpm_passthrough_cuse_init(tpm_pt, true);
+}
+
 static bool tpm_passthrough_get_startup_error(TPMBackend *tb)
 {
 TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
@@ -464,7 +502,7 @@ static void tpm_passthrough_deliver_request(TPMBackend *tb)
 {
 TPMPassthruState *tpm_pt = TPM_PASSTHROUGH(tb);
 
-/* TPM considered busy once TPM Request scheduled for processing */
+/* TPM considered busy once TPM request scheduled for processing */
 qemu_mutex_lock(&tpm_pt->state_lock);
 tpm_pt->tpm_busy = true;
 qemu_mutex_unlock(&tpm_pt->state_lock);
@@ -577,6 +615,25 @@ static int tpm_passthrough_open_sysfs_cancel(TPMBackend 
*tb)
 return fd;
 }
 
+static void tpm_passthrough_block_migration(TPMPassthruState *tpm_pt)
+{
+ptm_cap caps;
+
+if (TPM_PASSTHROUGH_USES_CUSE_TPM(tpm_pt)) {
+caps = PTM_CAP_GET_STATEBLOB | PTM_CAP_SET_STATEBLOB |
+   PTM_CAP_STOP;
+if (!TPM_CUSE_IMPLEMENTS_ALL(tpm_pt, caps)) {
+error_setg(&tpm_pt->migration_blocker,
+   

[Qemu-devel] [PATCH v4 1/4] Provide support for the CUSE TPM

2015-06-08 Thread Stefan Berger
Rather than integrating TPM functionality into QEMU directly
using the TPM emulation of libtpms, we now integrate an external
emulated TPM device. This device is expected to implement a Linux
CUSE interface (CUSE = character device in userspace).

QEMU talks to the CUSE TPM using much functionality of the
passthrough driver. For example, the TPM commands and responses
are sent to the CUSE TPM using the read()/write() interface.
However, some out-of-band control needs to be done using the CUSE
TPM's ioctls. The CUSE TPM currently defines and implements 15
different ioctls for controlling certain life-cycle aspects of
the emulated TPM. The ioctls can be regarded as a replacement for
direct function calls to a TPM emulator if the TPM were to be
directly integrated into QEMU.

One of the ioctls allows to get a bitmask of supported capabilities.
Each returned bit indicates which capabilities have been implemented.
An include file defining the various ioctls is added to QEMU.

The CUSE TPM and associated tools can be found here:

https://github.com/stefanberger/swtpm

(please use the latest version)

To use the external CUSE TPM, the CUSE TPM should be started as follows:

# terminate previously started CUSE TPM
/usr/bin/swtpm_ioctl -s /dev/vtpm-test

# start CUSE TPM
/usr/bin/swtpm_cuse -n vtpm-test

QEMU can then be started using the following parameters:

qemu-system-x86_64 \
[...] \
-tpmdev cuse-tpm,id=tpm0,cancel-path=/dev/null,path=/dev/vtpm-test \
-device tpm-tis,id=tpm0,tpmdev=tpm0 \
[...]


Signed-off-by: Stefan Berger 
Cc: Eric Blake 
---
 hmp.c|   6 ++
 hw/tpm/tpm_int.h |   1 +
 hw/tpm/tpm_ioctl.h   | 209 
 hw/tpm/tpm_passthrough.c | 274 +--
 qapi-schema.json |  18 +++-
 qemu-options.hx  |  21 +++-
 qmp-commands.hx  |   2 +-
 tpm.c|  11 +-
 8 files changed, 524 insertions(+), 18 deletions(-)
 create mode 100644 hw/tpm/tpm_ioctl.h

diff --git a/hmp.c b/hmp.c
index 514f22f..c1b9843 100644
--- a/hmp.c
+++ b/hmp.c
@@ -837,6 +837,12 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict)
tpo->has_cancel_path ? ",cancel-path=" : "",
tpo->has_cancel_path ? tpo->cancel_path : "");
 break;
+case TPM_TYPE_OPTIONS_KIND_CUSE_TPM:
+tpo = ti->options->passthrough;
+monitor_printf(mon, "%s%s",
+   tpo->has_path ? ",path=" : "",
+   tpo->has_path ? tpo->path : "");
+break;
 case TPM_TYPE_OPTIONS_KIND_MAX:
 break;
 }
diff --git a/hw/tpm/tpm_int.h b/hw/tpm/tpm_int.h
index f2f285b..6b2c9c9 100644
--- a/hw/tpm/tpm_int.h
+++ b/hw/tpm/tpm_int.h
@@ -61,6 +61,7 @@ struct tpm_resp_hdr {
 #define TPM_TAG_RSP_AUTH1_COMMAND 0xc5
 #define TPM_TAG_RSP_AUTH2_COMMAND 0xc6
 
+#define TPM_SUCCESS   0
 #define TPM_FAIL  9
 
 #define TPM_ORD_ContinueSelfTest  0x53
diff --git a/hw/tpm/tpm_ioctl.h b/hw/tpm/tpm_ioctl.h
new file mode 100644
index 000..cec49d5
--- /dev/null
+++ b/hw/tpm/tpm_ioctl.h
@@ -0,0 +1,209 @@
+/*
+ * tpm_ioctl.h
+ *
+ * (c) Copyright IBM Corporation 2014, 2015.
+ *
+ * This file is licensed under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * Every response from a command involving a TPM command execution must hold
+ * the ptm_res as the first element.
+ * ptm_res corresponds to the error code of a command executed by the TPM.
+ */
+
+typedef uint32_t ptm_res;
+
+/* PTM_GET_TPMESTABLISHED */
+struct ptm_est {
+ptm_res tpm_result;
+unsigned char bit; /* TPM established bit */
+};
+
+/* PTM_RESET_PTMESTABLISHED: reset establishment bit */
+struct ptm_reset_est {
+union {
+struct {
+uint8_t loc; /* locality to use */
+} req;
+struct {
+ptm_res tpm_result;
+} resp;
+} u;
+};
+
+/* PTM_INIT */
+struct ptm_init {
+union {
+struct {
+uint32_t init_flags; /* see definitions below */
+} req;
+struct {
+ptm_res tpm_result;
+} resp;
+} u;
+};
+
+/* above init_flags */
+#define INIT_FLAG_DELETE_VOLATILE (1 << 0)
+/* delete volatile state file after reading it */
+
+/* PTM_SET_LOCALITY */
+struct ptm_loc {
+union {
+struct {
+uint8_t loc; /* locality to set */
+} req;
+struct {
+ptm_res tpm_result;
+} resp;
+} u;
+};
+
+/* PTM_HASH_DATA: hash given data */
+struct ptm_hdata {
+union {
+struct {
+uint32_t length;
+uint8_t data[4096];
+} req;
+struct {
+ptm_res tpm_result;
+   

Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Michael S. Tsirkin
On Mon, Jun 08, 2015 at 11:55:22AM +0100, Jan Beulich wrote:
> >>> On 08.06.15 at 11:36,  wrote:
> > On Mon, Jun 08, 2015 at 10:03:18AM +0100, Jan Beulich wrote:
> >> >>> On 08.06.15 at 10:09,  wrote:
> >> > I believe the correct behaviour is happening but a PCIE completion 
> >> > timeout 
> >> > is occurring instead of a
> >> > unsupported request.
> >> 
> >> Might it be that with the supposedly correct device returning UR
> >> the root port reissues the request to the sibling device, which then
> >> fails it in a more dramatic way (albeit the sibling's Uncorrectable
> >> Error Status Register also has only Unsupported Request Error
> >> Status set)?
> > 
> > Isn't the sibling a function on the same device?
> 
> Yes.
> > And is the request causing the UR a memory read?
> 
> No, it's a write according to the ITP log.

So as far as I know, requesters normally can't
reissue posted requests such as writes.
See 6.2.3.1. Completion Status.



> > If so doesn't this use address routing?
> > What does it mean that the request is "to the sibling device" then?
> 
> The way I expressed things above may simply be due to my limited
> knowledge of PCIe terminology: I simply don't know (and can't find
> this spelled out in the spec) what the root port's behavior ought to
> be when a transaction comes in with an address that's within one of
> its base/limit regions, but none of the endpoints can fulfill the
> request.

I think that's because root port doesn't know this. root port forwards
the transaction downstream. It then gets back a UR.

> But you asking this made me look more closely at the
> memory ranges dumped out to the ITP log: The root port has
> 
> 0x20: Memory Base  = 0xca40
> 0x22: Memory Limit = 0xca40
> 0x24: Prefetchable Mem Base= 0xca21
> 0x26: Prefetchable Mem Limit   = 0xca21
> 
> while function 0 has
> 
> 0x10: Base Address Register 0  = 0xca23000c (Memory space, 64-bit access, 
> prefetchable)
> 0x18: Base Address Register 2  = 0xca24000c (Memory space, 64-bit access, 
> prefetchable)
> 0x20: Base Address Register 4  = 0xca25000c (Memory space, 64-bit access, 
> prefetchable)
> 
> and function 1
> 
> 0x10: Base Address Register 0  = 0xca2c (Memory space, 64-bit access, 
> prefetchable)
> 0x18: Base Address Register 2  = 0xca21000c (Memory space, 64-bit access, 
> prefetchable)

And what is the size of this BAR?

> 0x20: Base Address Register 4  = 0xca22000c (Memory space, 64-bit access, 
> prefetchable)
> 
> Isn't is bogus for all but one of the BARs being outside the root
> ports prefetchable memory window (the MSI-X tables being inside
> the BAR 4 region in both cases)?

I think it's an OK configuration in the sense that the spec does not
prohibit it. But the root port will never forward a transaction to these
addresses, so they can't be accessed from outside.

> > Does the sibling device have a BAR overlapping the address?
> 
> No, its BARs are fully separate.
> 
> Jan


Judging from the above, it's actually function 1's BAR 2 that
is accessed? Are you saying disabling memory on function 0
breaks function 2 somehow?





Re: [Qemu-devel] [PATCH v2] net: Add support of VIRTIO_NET_F_GUEST_ANNOUNCE for vhost-net/vhost-user

2015-06-08 Thread Thibaut Collet
Hi,

> I don't think qemu_send_packet_raw can ever work for vhost user.
> What happens if you merely add VIRTIO_NET_F_GUEST_ANNOUNCE
> to the feature list, and drop the rest of the patch?

If you merely add VIRTIO_NET_F_GUEST_ANNOUNCE you have the GARP with recent
guest after a live migration.
The rest of the patch (can be set in a separate commit) avoid a qemu
crashes with live migration when vhost-user is present:
 - When a live migration migration is complete a RARP is automatically send
to any net backend (self announce mechanism)
 - vhost user does not provide any receive function and RARP request is
stored in the vhost-user queue
 - When a migration back is done all the net backend queues are purged. The
stored RARP request for vhost-user is then sent to the register receive
callback of vhost-user that is NULL.

Support of live migration for vhost user needs the whole patch. (and maore
if we want support legacy guest with no support of
VIRTIO_NET_F_GUEST_ANNOUNCE)


> I don't get it.  Why do you need any extra messages for old drivers?  To
> detect old drivers, simply have backend check whether
> VIRTIO_NET_F_GUEST_ANNOUNCE is set.

For old driver we can not use the mechanism implemented in virtio-net that
manages VIRTIO_NET_F_GUEST_ANNOUNCE. In this case we must send the RARP on
the network interface created by vhost-user.
My first idea to do that was to add a message between QEMU and vhost
client/backend (vapp or other) to let the vhost client/backend send the
RARP.
But maybe it will be easier to let QEMU send directly the RARP message on
the network interface created by vhost user.
This point can be done later if it is needed.

Regards.


On Mon, Jun 8, 2015 at 12:12 PM, Michael S. Tsirkin  wrote:

> On Fri, Jun 05, 2015 at 03:24:12PM +0200, Thibaut Collet wrote:
> > Add VIRTIO_NET_F_GUEST_ANNOUNCE capability to vhost-net when netdev
> backend is
> > vhost-user.
> >
> > For netdev backend using virtio-net NIC the self announce is managed
> directly
> > by the virtio-net NIC and not by the netdev backend itself.
> > To avoid duplication of announces (once from the guest and once from
> QEMU) a
> > bitfield is added in the NetClientState structure.
> > If this bit is set self announce does not send message to the guest to
> request
> > gratuitous ARP but let virtio-net NIC set the VIRTIO_NET_S_ANNOUNCE for
> > gratuitous ARP.
> >
> > Signed-off-by: Thibaut Collet 
> > ---
> > v2: do not discard anymore packets send to vhost-user: it is GARP
> request after
> > live migration.
> > As suggested by S. Hajnoczi qemu_announce_self skips virtio-net NIC
> that
> > already send GARP.
> >
> >  hw/net/vhost_net.c |2 ++
> >  include/net/net.h  |1 +
> >  net/vhost-user.c   |2 ++
> >  savevm.c   |   11 ---
> >  4 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 426b23e..a745f97 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -82,6 +82,8 @@ static const int user_feature_bits[] = {
> >  VIRTIO_NET_F_CTRL_MAC_ADDR,
> >  VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
> >
> > +VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +
> >  VIRTIO_NET_F_MQ,
> >
> >  VHOST_INVALID_FEATURE_BIT
> > diff --git a/include/net/net.h b/include/net/net.h
> > index e66ca03..a78e9df 100644
> > --- a/include/net/net.h
> > +++ b/include/net/net.h
> > @@ -85,6 +85,7 @@ struct NetClientState {
> >  char *name;
> >  char info_str[256];
> >  unsigned receive_disabled : 1;
> > +unsigned self_announce_disabled : 1;
> >  NetClientDestructor *destructor;
> >  unsigned int queue_index;
> >  unsigned rxfilter_notify_enabled:1;
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > index 8d26728..b345446 100644
> > --- a/net/vhost-user.c
> > +++ b/net/vhost-user.c
> > @@ -147,6 +147,8 @@ static int net_vhost_user_init(NetClientState *peer,
> const char *device,
> >
> >  s = DO_UPCAST(VhostUserState, nc, nc);
> >
> > +/* Self announce is managed directly by virtio-net NIC */
> > +s->nc.self_announce_disabled = 1;
> >  /* We don't provide a receive callback */
> >  s->nc.receive_disabled = 1;
> >  s->chr = chr;
> > diff --git a/savevm.c b/savevm.c
> > index 3b0e222..7a134b1 100644
> > --- a/savevm.c
> > +++ b/savevm.c
> > @@ -80,11 +80,16 @@ static void qemu_announce_self_iter(NICState *nic,
> void *opaque)
> >  {
> >  uint8_t buf[60];
> >  int len;
> > +NetClientState *nc;
> >
> > -trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> > -len = announce_self_create(buf, nic->conf->macaddr.a);
> > +nc = qemu_get_queue(nic);
> >
> > -qemu_send_packet_raw(qemu_get_queue(nic), buf, len);
> > +if (!nc->peer->self_announce_disabled) {
> > +
> trace_qemu_announce_self_iter(qemu_ether_ntoa(&nic->conf->macaddr));
> > +len = announce_self_create(buf, nic->conf->macaddr.a);
> > +
> > +qemu_send_pack

Re: [Qemu-devel] [PATCH] xen/pass-through: ROM BAR handling adjustments

2015-06-08 Thread Stefano Stabellini
On Mon, 8 Jun 2015, Jan Beulich wrote:
> >>> On 05.06.15 at 18:41,  wrote:
> > On Fri, 5 Jun 2015, Jan Beulich wrote:
> >> >>> On 05.06.15 at 13:32,  wrote:
> >> >> --- a/hw/xen/xen_pt.c
> >> >> +++ b/hw/xen/xen_pt.c
> >> >> @@ -248,7 +248,9 @@ static void xen_pt_pci_write_config(PCID
> >> >>  
> >> >>  /* check unused BAR register */
> >> >>  index = xen_pt_bar_offset_to_index(addr);
> >> >> -if ((index >= 0) && (val > 0 && val < XEN_PT_BAR_ALLF) &&
> >> >> +if ((index >= 0) && (val != 0) &&
> >> >> +(((index != PCI_ROM_SLOT) ?
> >> >> +  val : (val | (uint32_t)~PCI_ROM_ADDRESS_MASK)) != 
> > XEN_PT_BAR_ALLF) &&
> >> > 
> >> > The change seems looks good and in line with the commit message. But
> >> > this if statement looks like acrobatic circus to me now.
> >> 
> >> I think the alternative of splitting it up into multiple if()-s would not
> >> be any better readable.
> > 
> > Would you be OK if I rewrote the statement as follows?
> > 
> > if ((index >= 0) && (val != 0)) {
> > uint32_t vu;
> > 
> > if (index == PCI_ROM_SLOT) {
> > vu = val | (uint32_t)~PCI_ROM_ADDRESS_MASK;
> > } else {
> > vu = val;
> > }
> > 
> > if ((vu != XEN_PT_BAR_ALLF) &&
> > (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) {
> > XEN_PT_WARN(d, "Guest attempt to set address to unused Base 
> > Address "
> > "Register. (addr: 0x%02x, len: %d)\n", addr, len);
> > }
> > }
> 
> Actually I agree that this indeed looks better. If I were to make the
> change, I'd do
> 
> if ((index >= 0) && (val != 0)) {
> uint32_t vu = val;
> 
> if (index == PCI_ROM_SLOT) {
> vu |= (uint32_t)~PCI_ROM_ADDRESS_MASK;
> }
> 
> if ((vu != XEN_PT_BAR_ALLF) &&
> ...
> 
> though. But if you're going to do the edit without wanting me to
> re-submit, I'll certainly leave this to you. Just let me know which
> way you prefer it to be handled.

I prefer if you resubmit, thanks!



Re: [Qemu-devel] segfault in memcmp

2015-06-08 Thread Gerd Hoffmann
On Mo, 2015-06-08 at 10:31 +0100, Stefan Hajnoczi wrote:
> On Fri, Jun 05, 2015 at 05:19:53PM -0500, perrier vincent wrote:
> > Using a very old guest (lenny) with spice and vga=cirrus, I have
> > a segfault:

Hmm, doesn't reproduce on master (booting i386 lenny install iso).
Which qemu version is this?

> > FILE:  ui/spice-display.c
> > FUNCTION:  qemu_spice_create_update
> > LINE:  if (memcmp(guest + yoff + xoff,
> >mirror + yoff + xoff,
> >bw * bpp) == 0)
> > 
> > The address of mirror + yoff + xoff is out of boundaries.
> > 
> > I use the following to avoid the crash:
> > 
> > ...
> >   img_get_stride = pixman_image_get_stride(ssd->mirror);
> >   img_height = pixman_image_get_height(ssd->mirror);
> >   img_max = img_height * img_get_stride;
> > ...
> >   if (yoff > img_max)
> > {
> > if (dirty_top[blk] == -1)
> >   dirty_top[blk] = y;
> > }

I'm wondering how you end up with yoff being ouf of boundaries in the
first place ...

cheers,
  Gerd





Re: [Qemu-devel] [Xen-devel] [PATCH][XSA-126] xen: limit guest control of PCI command register

2015-06-08 Thread Jan Beulich
>>> On 08.06.15 at 13:28,  wrote:
> On Mon, Jun 08, 2015 at 11:55:22AM +0100, Jan Beulich wrote:
>> But you asking this made me look more closely at the
>> memory ranges dumped out to the ITP log: The root port has
>> 
>> 0x20: Memory Base  = 0xca40
>> 0x22: Memory Limit = 0xca40
>> 0x24: Prefetchable Mem Base= 0xca21
>> 0x26: Prefetchable Mem Limit   = 0xca21
>> 
>> while function 0 has
>> 
>> 0x10: Base Address Register 0  = 0xca23000c (Memory space, 64-bit access, 
> prefetchable)
>> 0x18: Base Address Register 2  = 0xca24000c (Memory space, 64-bit access, 
> prefetchable)
>> 0x20: Base Address Register 4  = 0xca25000c (Memory space, 64-bit access, 
> prefetchable)
>> 
>> and function 1
>> 
>> 0x10: Base Address Register 0  = 0xca2c (Memory space, 64-bit access, 
> prefetchable)
>> 0x18: Base Address Register 2  = 0xca21000c (Memory space, 64-bit access, 
> prefetchable)
> 
> And what is the size of this BAR?

Sadly I don't seem have a matching pair of kernel and ITP logs, so I
can only guess that it's 64k (or less).

Jan




  1   2   3   4   5   >