[Qemu-devel] [PATCH] scsi: execute SYNCHRONIZE_CACHE asynchronously

2011-09-06 Thread Paolo Bonzini
Signed-off-by: Paolo Bonzini 
---
 hw/scsi-disk.c |   41 +
 1 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 3ffd536..c23c2b8 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -130,6 +130,24 @@ static void scsi_read_complete(void * opaque, int ret)
 scsi_req_data(&r->req, r->iov.iov_len);
 }
 
+static void scsi_flush_complete(void * opaque, int ret)
+{
+SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+if (r->req.aiocb != NULL) {
+r->req.aiocb = NULL;
+bdrv_acct_done(s->bs, &r->acct);
+}
+
+if (ret < 0) {
+if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
+return;
+}
+}
+
+scsi_req_complete(&r->req, GOOD);
+}
 
 /* Read more data from scsi device into buffer.  */
 static void scsi_read_data(SCSIRequest *req)
@@ -794,7 +812,6 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, 
uint8_t *outbuf)
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
 uint64_t nb_sectors;
 int buflen = 0;
-int ret;
 
 switch (req->cmd.buf[0]) {
 case TEST_UNIT_READY:
@@ -866,20 +883,6 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, 
uint8_t *outbuf)
 outbuf[7] = 0;
 buflen = 8;
 break;
-case SYNCHRONIZE_CACHE:
-{
-BlockAcctCookie acct;
-
-bdrv_acct_start(s->bs, &acct, 0, BDRV_ACCT_FLUSH);
-ret = bdrv_flush(s->bs);
-bdrv_acct_done(s->bs, &acct);
-if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
-return -1;
-}
-}
-break;
-}
 case GET_CONFIGURATION:
 memset(outbuf, 0, 8);
 /* ??? This should probably return much more information.  For now
@@ -987,7 +990,6 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t 
*buf)
 case START_STOP:
 case ALLOW_MEDIUM_REMOVAL:
 case READ_CAPACITY_10:
-case SYNCHRONIZE_CACHE:
 case READ_TOC:
 case GET_CONFIGURATION:
 case SERVICE_ACTION_IN:
@@ -999,6 +1001,13 @@ static int32_t scsi_send_command(SCSIRequest *req, 
uint8_t *buf)
 
 r->iov.iov_len = rc;
 break;
+case SYNCHRONIZE_CACHE:
+bdrv_acct_start(s->bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+r->req.aiocb = bdrv_aio_flush(s->bs, scsi_flush_complete, r);
+if (r->req.aiocb == NULL) {
+scsi_flush_complete(r, -EIO);
+}
+return 0;
 case READ_6:
 case READ_10:
 case READ_12:
-- 
1.7.6




Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-06 Thread Avi Kivity

On 09/06/2011 06:06 AM, Wen Congyang wrote:

>  Use the uio driver -
>  http://docs.blackfin.uclinux.org/kernel/generated/uio-howto/.  You just
>  mmap() the BAR from userspace and play with it.

When I try to bind ivshmem to uio_pci_generic, I get the following messages:
uio_pci_generic :01:01.0: No IRQ assigned to device: no support for 
interrupts?



No idea what this means.

--
error compiling committee.c: too many arguments to function




Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-06 Thread Avi Kivity

On 09/05/2011 10:36 PM, Blue Swirl wrote:

>  I don't agree. That's not what qemu_irq represents.
>  It represents a wire, a mechanism to drive changes through logic paths
>  between state. It is intrinsically stateless.
>
>  It may be the case that it is missused in some places, or that it isn't
>  always the best thing to use to represent what ever you need to represent,
>  so that you want to complement with other mechanisms.
>  But universally replacing it with a stateful alternative seems wrong to me.

Maybe there could be a stateless version of Pin for optimization:
Line? This would probably save one bool worth of memory and one memory
store access for each IRQ event.


Let's actually measure the effect of the bool/store before we optimize 
it away.


--
error compiling committee.c: too many arguments to function




Re: [Qemu-devel] new memory api: offsets?

2011-09-06 Thread Avi Kivity

On 09/01/2011 01:00 PM, Michael Walle wrote:

Am Donnerstag 01 September 2011, 07:54:28 schrieb Avi Kivity:
>  On 09/01/2011 01:54 AM, Michael Walle wrote:
>  >  Hi Avi,
>  >
>  >  while debugging, i noticed, that mr->offset is never set, expect in the
>  >  initializer. (The subregion collision warning is printed although the
>  >  regions are not overlapping.) Is this intended?
>
>  Did you miss memory_region_set_offset()?
I saw that function but it's never called. Now i noticed that this is a
deprecated public function.

>  The subregion collision warning is unrelated?
you are walking along the subregions and use that offset property to detect
collisions. Shouldn't you use the addr property instead?


Ah, of course.



@@ -1190,16 +1190,18 @@ static void
memory_region_add_subregion_common(MemoryRegion *mr,
  if (subregion->may_overlap || other->may_overlap) {
  continue;
  }
-if (offset>= other->offset + other->size
-|| offset + subregion->size<= other->offset) {
+if (offset>= other->addr + other->size
+|| offset + subregion->size<= other->addr) {
  continue;
  }


Right.  Please post with a changelog and signoff.



>  >  btw. you may include the memory region name in the warning.
>
>  sure, patches welcome.

-printf("warning: subregion collision %llx/%llx vs %llx/%llx\n",
+printf("warning: subregion collision %llx/%llx (%s) "
+   "vs %llx/%llx (%s)\n",
 (unsigned long long)offset,
 (unsigned long long)subregion->size,
-   (unsigned long long)other->offset,
-   (unsigned long long)other->size);
+   subregion->name,
+   (unsigned long long)other->addr,
+   (unsigned long long)other->size,
+   other->name);

let me know if i should post a git patch instead :)



Yes, with a signoff.

--
error compiling committee.c: too many arguments to function




[Qemu-devel] [PATCH] docs: Fix qdev-device-use.txt typo in -chardev serial, path=COM

2011-09-06 Thread Markus Armbruster

Signed-off-by: Markus Armbruster 
---
 docs/qdev-device-use.txt |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/docs/qdev-device-use.txt b/docs/qdev-device-use.txt
index 057c322..136d271 100644
--- a/docs/qdev-device-use.txt
+++ b/docs/qdev-device-use.txt
@@ -208,7 +208,7 @@ LEGACY-CHARDEV translates to -chardev HOST-OPTS... as 
follows:
 
 * con: becomes -chardev console
 
-* COM becomes -chardev serial,path=
+* COM becomes -chardev serial,path=COM
 
 * file:FNAME becomes -chardev file,path=FNAME
 
-- 
1.7.6




[Qemu-devel] [PATCH] Only build with -g CFLAGS/LDFLAGS if using --enable-debug and add --optflags.

2011-09-06 Thread Brad
Only build with -g CFLAGS/LDFLAGS if using --enable-debug.
Add --optflags to allow overriding the default optimization
level added to CFLAGS.

This is a first draft of coming up with a patch I could potentially
push upstream based on much cruder local patches to do something
similar. I'm trying to eliminate having to patch the configure
script.

Comments?

---
 configure |   19 +++
 1 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index c3044c7..23a85a7 100755
--- a/configure
+++ b/configure
@@ -77,6 +77,7 @@ path_of() {
 # default parameters
 source_path=`dirname "$0"`
 cpu=""
+optflags=""
 interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
 sparc_cpu=""
@@ -195,6 +196,8 @@ for opt do
   ;;
   --cpu=*) cpu="$optarg"
   ;;
+  --optflags=*) optflags="$optarg"
+  ;;
   --extra-cflags=*) QEMU_CFLAGS="$optarg $QEMU_CFLAGS"
   ;;
   --extra-ldflags=*) LDFLAGS="$optarg $LDFLAGS"
@@ -232,13 +235,11 @@ sdl_config="${SDL_CONFIG-${cross_prefix}sdl-config}"
 
 # default flags for all hosts
 QEMU_CFLAGS="-fno-strict-aliasing $QEMU_CFLAGS"
-CFLAGS="-g $CFLAGS"
 QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
$QEMU_CFLAGS"
 QEMU_CFLAGS="-D_FORTIFY_SOURCE=2 $QEMU_CFLAGS"
 QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/fpu"
-LDFLAGS="-g $LDFLAGS"
 
 # make source path absolute
 source_path=`cd "$source_path"; pwd`
@@ -518,6 +519,8 @@ for opt do
   ;;
   --cc=*)
   ;;
+  --optflags=*)
+  ;;
   --host-cc=*) host_cc="$optarg"
   ;;
   --make=*) make="$optarg"
@@ -937,6 +940,7 @@ echo "  --cross-prefix=PREFIXuse PREFIX for compile 
tools [$cross_prefix]"
 echo "  --cc=CC  use C compiler CC [$cc]"
 echo "  --host-cc=CC use C compiler CC [$host_cc] for code run at"
 echo "   build time"
+echo "  --optflags=FLAGS override optimization compiler flags"
 echo "  --extra-cflags=CFLAGSappend extra C compiler flags QEMU_CFLAGS"
 echo "  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS"
 echo "  --make=MAKE  use specified make [$make]"
@@ -2569,8 +2573,15 @@ fi
 # End of CC checks
 # After here, no more $cc or $ld runs
 
-if test "$debug" = "no" ; then
-  CFLAGS="-O2 $CFLAGS"
+if test "$debug" = "yes" ; then
+  CFLAGS="-g $CFLAGS"
+  LDFLAGS="-g $LDFLAGS"
+else
+  if test -n "$optflags" ; then
+  CFLAGS="$optflags $CFLAGS"
+  else
+  CFLAGS="-O2 $CFLAGS"
+  fi
 fi
 
 # Consult white-list to determine whether to enable werror
-- 
1.7.6




Re: [Qemu-devel] qemu segfaults at start

2011-09-06 Thread octane indice
En réponse à Stefan Hajnoczi  :
> > qemu disk.img
> > Segmentation fault
> 
> Please post the backtrace as well as your host operating
> system
> version (e.g. Fedora 15):
> 
> gdb --args qemu disk.img
> (gdb) r
> ...runs and crashes...
> (gdb) bt
>
Thanks for the help, here the infos:

I run under slackware 13.1
$ gcc -v
Reading specs from /usr/lib/gcc/i486-slackware-linux/4.4.4/specs
Target: i486-slackware-linux
Configured with: ../gcc-4.4.4/configure --prefix=/usr --libdir=/usr/lib 
--enable-
shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,java,objc --
enable-threads=posix --enable-checking=release --with-system-zlib --with-
python-dir=/lib/python2.6/site-packages --disable-libunwind-exceptions --
enable-__cxa_atexit --enable-libssp --with-gnu-ld --verbose --with-arch=i486 -
-target=i486-slackware-linux --build=i486-slackware-linux --host=i486-
slackware-linux
Thread model: posix
gcc version 4.4.4 (GCC)
$ uname -a
Linux aspireone 2.6.33.4-smp #2 SMP Wed May 12 22:47:36 CDT 2010 i686 
Intel(R) Atom(TM) CPU N270   @ 1.60GHz GenuineIntel GNU/Linux

-I'm remote, so I use vnc, but even in local it does the same.
-In order to prove it's not related to the disk used, I create an empty one for 
the purpose:
$ dd if=/dev/zero of=disk.img bs=1024k count=1


$ gdb --args qemu disk.img -vnc 127.0.0.1:1
GNU gdb (GDB) 7.1
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 

This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i486-slackware-linux".
For bug reporting instructions, please see:
...
Reading symbols from /usr/local/bin/qemu...(no debugging symbols 
found)...done.
(gdb) r
Starting program: /usr/local/bin/qemu disk.img -vnc 127.0.0.1:1
[Thread debugging using libthread_db enabled]

Program received signal SIGSEGV, Segmentation fault.
0x081a94c1 in ?? ()
(gdb) bt
#0  0x081a94c1 in ?? ()
#1  0xb58af3e7 in ?? ()
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
(gdb) info reg
eax0xbfffef08   -1073746168
ecx0x1  1
edx0x0  0
ebx0x8  8
esp0xbfffee50   0xbfffee50
ebp0xbfffef08   0xbfffef08
esi0x0  0
edi0x0  0
eip0x81a94c10x81a94c1
eflags 0x210246 [ PF ZF IF RF ID ]
cs 0x73 115
ss 0x7b 123
ds 0x7b 123
es 0x7b 123
fs 0x0  0
gs 0x33 51
(gdb)

And exact same bt if I launch with qemu -hda disk.img

HTH, thanks


Envoyé avec Inmano, ma messagerie renversante et gratuite : 
http://www.inmano.com






Re: [Qemu-devel] qemu segfaults at start

2011-09-06 Thread Stefan Weil

Am 06.09.2011 10:11, schrieb octane indice:

$ gdb --args qemu disk.img -vnc 127.0.0.1:1
GNU gdb (GDB) 7.1
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later

This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i486-slackware-linux".
For bug reporting instructions, please see:
...
Reading symbols from /usr/local/bin/qemu...(no debugging symbols
found)...done.
(gdb) r
Starting program: /usr/local/bin/qemu disk.img -vnc 127.0.0.1:1
[Thread debugging using libthread_db enabled]

Program received signal SIGSEGV, Segmentation fault.
0x081a94c1 in ?? ()
(gdb) bt
#0  0x081a94c1 in ?? ()
#1  0xb58af3e7 in ?? ()
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
(gdb) info reg
eax0xbfffef08   -1073746168
ecx0x1  1
edx0x0  0
ebx0x8  8
esp0xbfffee50   0xbfffee50
ebp0xbfffef08   0xbfffef08
esi0x0  0
edi0x0  0
eip0x81a94c10x81a94c1
eflags 0x210246 [ PF ZF IF RF ID ]
cs 0x73 115
ss 0x7b 123
ds 0x7b 123
es 0x7b 123
fs 0x0  0
gs 0x33 51
(gdb)

And exact same bt if I launch with qemu -hda disk.img

HTH, thanks
   


/usr/local/bin/qemu is stripped because it was installed with make install,
so there is no useful debugging information.

Please look for the unstripped i386-softmmu/qemu executable in your build path
and run it using gdb.

Regards,
Stefan Weil





Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-06 Thread David Gibson
On Tue, Sep 06, 2011 at 08:55:42AM +0200, Paolo Bonzini wrote:
> On 09/06/2011 05:12 AM, David Gibson wrote:
> >I'm not "fixing ppc".  I'm fixing a fundamental flaw in the protocol
> >implementation._So far_  I've only observed the effects on ppc, but
> >that doesn't mean they don't exist.
> 
> Actually Michael is right.  The implementation is correct on x86,
> though wrong anywhere else (perhaps s390?).  On those architectures
> you do not need rmb() and wmb().
> 
> So doing the __sync_synchronize() on ppc only as he proposed is
> wrong; but not doing it (and leaving only a compiler barrier) on x86
> is correct.  See http://g.oswego.edu/dl/jmm/cookbook.html under
> "Multiprocessors".

Well, in any case, I've realised that we ought to merge the barriers
in virtio with those in qemu-barrier.h.  Which are also unsafe on
non-x86.

I have a draft two patch series to do that, which I intend to post
soon.  Unfortunately, I haven't been able to test it on ppc kvm yet,
because there are other bugs which are making qemu not even boot the
pseries machine on a ppc64 host.  Still trying to work out what the
hell's going on there, smells like bad extension or masking of CTR
(SLOF seems to get stuck in a bdnz loop with something close to 2^57
in CTR).

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson



Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-06 Thread Markus Armbruster
Gerd Hoffmann  writes:

>   Hi,
>
>>> Looking...  qdev_device_help() shows only device properties, not bus
>>> properties.  I'd call that a bug.
>>
>> Hmm, but is "bus" a bus property?
>
> It isn't.  bus= is handled by qdev core (id= too).  addr= actually is
> a (pci) bus property.

bus is indeed treated specially, but the user doesn't care.  The help
text should cover it just like any "normal" property.



Re: [Qemu-devel] [PATCH] Only build with -g CFLAGS/LDFLAGS if using --enable-debug and add --optflags.

2011-09-06 Thread Peter Maydell
On 6 September 2011 09:02, Brad  wrote:
> Only build with -g CFLAGS/LDFLAGS if using --enable-debug.

I'm not convinced -- I think we should always build with -g;
it means you can do at least some debugging on an in-tree
build even if it's the optimised version. Debug info should
be stripped at install time, right?

(I don't think this is a particularly out-on-a-limb view;
it's the standard policy for building things in Debian,
for instance.)

-- PMM



Re: [Qemu-devel] qemu segfaults at start

2011-09-06 Thread Stefan Hajnoczi
On Tue, Sep 6, 2011 at 9:11 AM, octane indice  wrote:
> En réponse à Stefan Hajnoczi  :
> -In order to prove it's not related to the disk used, I create an empty one 
> for
> the purpose:
> $ dd if=/dev/zero of=disk.img bs=1024k count=1

You can run QEMU completely without a disk, just run:
$ gdb qemu
(gdb) r

I wonder if it crashes that way too.

Stefan



Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-06 Thread Avi Kivity

On 09/06/2011 09:55 AM, Paolo Bonzini wrote:

On 09/06/2011 05:12 AM, David Gibson wrote:

I'm not "fixing ppc".  I'm fixing a fundamental flaw in the protocol
implementation._So far_  I've only observed the effects on ppc, but
that doesn't mean they don't exist.


Actually Michael is right.  The implementation is correct on x86, 
though wrong anywhere else (perhaps s390?).  On those architectures 
you do not need rmb() and wmb().


Are we sure?  Nothing prevents the guest from using weakly-ordered 
writes, is there?  For example, using MOVNTDQ.


Although in that case the guest is probably required to issue an SFENCE.

--
error compiling committee.c: too many arguments to function




Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-06 Thread Michael S. Tsirkin
On Tue, Sep 06, 2011 at 12:28:40PM +0300, Avi Kivity wrote:
> On 09/06/2011 09:55 AM, Paolo Bonzini wrote:
> >On 09/06/2011 05:12 AM, David Gibson wrote:
> >>I'm not "fixing ppc".  I'm fixing a fundamental flaw in the protocol
> >>implementation._So far_  I've only observed the effects on ppc, but
> >>that doesn't mean they don't exist.
> >
> >Actually Michael is right.  The implementation is correct on x86,
> >though wrong anywhere else (perhaps s390?).  On those
> >architectures you do not need rmb() and wmb().
> 
> Are we sure?  Nothing prevents the guest from using weakly-ordered
> writes, is there?  For example, using MOVNTDQ.

Yes but the macros in question are here to order qemu accesses,
not guest accesses.
> 
> Although in that case the guest is probably required to issue an SFENCE.
> -- 
> error compiling committee.c: too many arguments to function



Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-06 Thread Paolo Bonzini

On 09/06/2011 11:28 AM, Avi Kivity wrote:


Actually Michael is right.  The implementation is correct on x86,
though wrong anywhere else (perhaps s390?).  On those architectures
you do not need rmb() and wmb().


Are we sure?  Nothing prevents the guest from using weakly-ordered
writes, is there?  For example, using MOVNTDQ.

Although in that case the guest is probably required to issue an SFENCE.


Yes, that's the guest problem.  You cannot do an SFENCE on the guest's 
behalf anyway.


Paolo



Re: [Qemu-devel] [PATCH] Only build with -g CFLAGS/LDFLAGS if using --enable-debug and add --optflags.

2011-09-06 Thread Gerd Hoffmann

On 09/06/11 10:02, Brad wrote:

Only build with -g CFLAGS/LDFLAGS if using --enable-debug.
Add --optflags to allow overriding the default optimization
level added to CFLAGS.

This is a first draft of coming up with a patch I could potentially
push upstream based on much cruder local patches to do something
similar. I'm trying to eliminate having to patch the configure
script.


You don't have to.  You can just run 'make CFLAGS="$optflags"' to 
override the defaults.  Nevertheless having optflags would be nice as 
you don't have to type this for each make run then.


I don't think we should mess with the -g flag.  It should stay enabled 
by default, so you can easily get a useful stacktrace out of a core 
without having to rebuild with debug info first.


cheers,
  Gerd




[Qemu-devel] [PATCH] scsi: refine constants for READ CAPACITY 16

2011-09-06 Thread Paolo Bonzini
Rename SERVICE_ACTION_IN to SERVICE_ACTION_IN_16 to distinguish
from the 12-byte CDB variant, and add a constant for the subcommand.

Signed-off-by: Paolo Bonzini 
---
 hw/scsi-bus.c  |3 ++-
 hw/scsi-defs.h |8 +++-
 hw/scsi-disk.c |6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 7c8afff..80b8802 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -981,7 +981,7 @@ static const char *scsi_command_name(uint8_t cmd)
 [ LOCATE_16] = "LOCATE_16",
 [ WRITE_SAME_16] = "WRITE_SAME_16",
 [ ERASE_16 ] = "ERASE_16",
-[ SERVICE_ACTION_IN] = "SERVICE_ACTION_IN",
+[ SERVICE_ACTION_IN_16 ] = "SERVICE_ACTION_IN_16",
 [ WRITE_LONG_16] = "WRITE_LONG_16",
 [ REPORT_LUNS  ] = "REPORT_LUNS",
 [ BLANK] = "BLANK",
@@ -991,6 +991,7 @@ static const char *scsi_command_name(uint8_t cmd)
 [ LOAD_UNLOAD  ] = "LOAD_UNLOAD",
 [ READ_12  ] = "READ_12",
 [ WRITE_12 ] = "WRITE_12",
+[ SERVICE_ACTION_IN_12 ] = "SERVICE_ACTION_IN_12",
 [ WRITE_VERIFY_12  ] = "WRITE_VERIFY_12",
 [ VERIFY_12] = "VERIFY_12",
 [ SEARCH_HIGH_12   ] = "SEARCH_HIGH_12",
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index ea288fa..bfe9392 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -102,7 +102,7 @@
 #define LOCATE_16 0x92
 #define WRITE_SAME_16 0x93
 #define ERASE_16  0x93
-#define SERVICE_ACTION_IN 0x9e
+#define SERVICE_ACTION_IN_16  0x9e
 #define WRITE_LONG_16 0x9f
 #define REPORT_LUNS   0xa0
 #define BLANK 0xa1
@@ -112,6 +112,7 @@
 #define LOAD_UNLOAD   0xa6
 #define READ_12   0xa8
 #define WRITE_12  0xaa
+#define SERVICE_ACTION_IN_12  0xab
 #define WRITE_VERIFY_12   0xae
 #define VERIFY_12 0xaf
 #define SEARCH_HIGH_120xb0
@@ -123,6 +124,11 @@
 #define SET_CD_SPEED  0xbb
 
 /*
+ * SERVICE ACTION IN subcodes
+ */
+#define SAI_READ_CAPACITY_16  0x10
+
+/*
  *  SAM Status codes
  */
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index a1c5812..487f6cb 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -945,9 +945,9 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r)
 outbuf[7] = 8; // CD-ROM
 buflen = 8;
 break;
-case SERVICE_ACTION_IN:
+case SERVICE_ACTION_IN_16:
 /* Service Action In subcommands. */
-if ((req->cmd.buf[1] & 31) == 0x10) {
+if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
 DPRINTF("SAI READ CAPACITY(16)\n");
 memset(outbuf, 0, req->cmd.xfer);
 bdrv_get_geometry(s->bs, &nb_sectors);
@@ -1045,7 +1045,7 @@ static int32_t scsi_send_command(SCSIRequest *req, 
uint8_t *buf)
 case READ_CAPACITY_10:
 case READ_TOC:
 case GET_CONFIGURATION:
-case SERVICE_ACTION_IN:
+case SERVICE_ACTION_IN_16:
 case VERIFY_10:
 rc = scsi_disk_emulate_command(r);
 if (rc < 0) {
-- 
1.7.6




[Qemu-devel] [PATCH] scsi: fill in additional sense length correctly

2011-09-06 Thread Paolo Bonzini
Even though we do not use them, we should include the last three
bytes of sense data in the additional sense length.

Signed-off-by: Paolo Bonzini 
---
 hw/scsi-bus.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 80b8802..96d6305 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -501,6 +501,7 @@ void scsi_req_build_sense(SCSIRequest *req, SCSISense sense)
 memset(req->sense, 0, 18);
 req->sense[0] = 0xf0;
 req->sense[2] = sense.key;
+req->sense[7] = 10;
 req->sense[12] = sense.asc;
 req->sense[13] = sense.ascq;
 req->sense_len = 18;
@@ -887,7 +888,7 @@ int scsi_build_sense(uint8_t *in_buf, int in_len,
 /* Return fixed format sense buffer */
 buf[0] = 0xf0;
 buf[2] = sense.key;
-buf[7] = 7;
+buf[7] = 10;
 buf[12] = sense.asc;
 buf[13] = sense.ascq;
 return MIN(len, 18);
-- 
1.7.6




[Qemu-devel] [PATCH] scsi: improve MODE SENSE emulation

2011-09-06 Thread Paolo Bonzini
- do not return extra pages when requesting all pages (PAGE CODE = 0x3f)

- return correct sense code for PC = 3 (saved parameters not supported)

- do not return geometry pages for CD devices

Signed-off-by: Paolo Bonzini 
---
 hw/scsi-disk.c |   96 +++-
 1 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 487f6cb..eaff8b8 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -583,12 +583,12 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, 
uint8_t *outbuf)
 return buflen;
 }
 
-static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p,
+static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
int page_control)
 {
-SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
 BlockDriverState *bdrv = s->bs;
 int cylinders, heads, secs;
+uint8_t *p = *p_outbuf;
 
 /*
  * If Changeable Values are requested, a mask denoting those mode 
parameters
@@ -598,10 +598,13 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
  */
 switch (page) {
 case 4: /* Rigid disk device geometry page. */
+if (s->qdev.type == TYPE_ROM) {
+return -1;
+}
 p[0] = 4;
 p[1] = 0x16;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 /* if a geometry hint is available, use it */
 bdrv_get_geometry_hint(bdrv, &cylinders, &heads, &secs);
@@ -627,13 +630,16 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 /* Medium rotation rate [rpm], 5400 rpm */
 p[20] = (5400 >> 8) & 0xff;
 p[21] = 5400 & 0xff;
-return p[1] + 2;
+break;
 
 case 5: /* Flexible disk device geometry page. */
+if (s->qdev.type == TYPE_ROM) {
+return -1;
+}
 p[0] = 5;
 p[1] = 0x1e;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 /* Transfer rate [kbit/s], 5Mbit/s */
 p[2] = 5000 >> 8;
@@ -666,26 +672,27 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 /* Medium rotation rate [rpm], 5400 rpm */
 p[28] = (5400 >> 8) & 0xff;
 p[29] = 5400 & 0xff;
-return p[1] + 2;
+break;
 
 case 8: /* Caching page.  */
 p[0] = 8;
 p[1] = 0x12;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 if (bdrv_enable_write_cache(s->bs)) {
 p[2] = 4; /* WCE */
 }
-return p[1] + 2;
+break;
 
 case 0x2a: /* CD Capabilities and Mechanical Status page. */
-if (s->qdev.type != TYPE_ROM)
-return 0;
+if (s->qdev.type != TYPE_ROM) {
+return -1;
+}
 p[0] = 0x2a;
 p[1] = 0x14;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 p[2] = 3; // CD-R & CD-RW read
 p[3] = 0; // Writing not supported
@@ -710,27 +717,30 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 p[19] = (16 * 176) & 0xff;
 p[20] = (16 * 176) >> 8; // 16x write speed current
 p[21] = (16 * 176) & 0xff;
-return p[1] + 2;
+break;
 
 default:
-return 0;
+return -1;
 }
+
+*p_outbuf += p[1] + 2;
+return p[1] + 2;
 }
 
-static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf)
+static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
 {
-SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 uint64_t nb_sectors;
-int page, dbd, buflen, page_control;
+int page, dbd, buflen, ret, page_control;
 uint8_t *p;
 uint8_t dev_specific_param;
 
-dbd = req->cmd.buf[1]  & 0x8;
-page = req->cmd.buf[2] & 0x3f;
-page_control = (req->cmd.buf[2] & 0xc0) >> 6;
+dbd = r->req.cmd.buf[1]  & 0x8;
+page = r->req.cmd.buf[2] & 0x3f;
+page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
 DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n",
-(req->cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, req->cmd.xfer, 
page_control);
-memset(outbuf, 0, req->cmd.xfer);
+(r->req.cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, r->req.cmd.xfer, 
page_control);
+memset(outbuf, 0, r->req.cmd.xfer);
 p = outbuf;
 
 if (bdrv_is_read_only(s->bs)) {
@@ -739,7 +749,7 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, 
uint8_t *outbuf)
 dev_specific_param = 0x00;
 }
 
-if (req->cmd.buf[0] == MODE_SENSE) {
+if (r->req.cmd.buf[0] == MODE_SENSE) {
 p[1] = 0; /* Default media type.  */
 p[2] = dev_spe

[Qemu-devel] 50 euros offerts pour démarrer votre campagne Google AdWords

2011-09-06 Thread Google AdWords
Affichez correctement cet email.
 

  
   
 Vous souhaitez apparaître sur la page de résultats de Google lorsqu'un 
utilisateur recherche vos produits ou services ? 
 Essayez GRATUITEMENT le programme publicitaire AdWords de Google qui vous 
permet de publier vos annonces en ligne à côté des résultats du moteur de 
recherche Google. 
 Obtenez votre crédit publicitaire de 50€ dès maintenant et lancez votre 
campagne !
 
 NB: La publicité sur Google convient tout à fait aux petits budget, vos 
dépenses restent toujours sous contrôle et vous pouvez interrompre votre 
campagne à tout moment. 

  * Ce crédit promotionnel de 50€ est valable uniquement pour les nouveaux 
annonceurs, dont les comptes AdWords ont moins de 14 jours, et ayant une 
adresse de facturation en France. L'ouverture d'un compte AdWords est assorti 
de 5€ de frais d'activation. Ces frais d'activation ne seront pas facturés à 
condition de choisir la facturation par post-paiement. Pour en savoir plus, 
rendez-vous sur www.google.fr/adwords/coupons/terms.html  

Merci de ne plus nous contacter.
 
 
Si vous souhaitez vous désabonner, recopiez cette adresse dans la barre 
d'adresse de votre navigateur : 
http://www.ccmail-gmc2.com/u-1.1.php?param=3Rbi_1IvEkRQjb1KxEXDWVukfGahjZsOBcEkRQjb1KwEjb1KEkRQjb1KxE2/jGEg/W/KEkRQjb1KSEIQIvIGmQRvEkRQjb1KwEiQI3bi3EkRQjb1KxEwJCLLLSLEkRQjb1KwE3sk/lPbY/EkRQjb1KxExEkRQjb1KwE1glibjkbvI/EkRQjb1KxESwS


Re: [Qemu-devel] [PATCH] Only build with -g CFLAGS/LDFLAGS if using --enable-debug and add --optflags.

2011-09-06 Thread Peter Maydell
On 6 September 2011 09:02, Brad  wrote:
> @@ -937,6 +940,7 @@ echo "  --cross-prefix=PREFIX    use PREFIX for compile 
> tools [$cross_prefix]"
>  echo "  --cc=CC                  use C compiler CC [$cc]"
>  echo "  --host-cc=CC             use C compiler CC [$host_cc] for code run 
> at"
>  echo "                           build time"
> +echo "  --optflags=FLAGS         override optimization compiler flags"
>  echo "  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS"
>  echo "  --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS"
>  echo "  --make=MAKE              use specified make [$make]"

No in principle objection to the --optflags bit of the patch, but you
should rearrange things so that the help message can include the
default optflags, something like:
  --optflags=FLAGSspecify compiler flags for optimization [-O2]

(...which might also let you avoid the 'if test -n "$optflags" later.)

-- PMM



Re: [Qemu-devel] [PATCH v3 2/8] VMDK: add twoGbMaxExtentSparse support

2011-09-06 Thread Kevin Wolf
Am 12.08.2011 17:19, schrieb Fam Zheng:
> Add twoGbMaxExtentSparse support. Introduce vmdk_free_last_extent.
> 
> Signed-off-by: Fam Zheng 
> ---
>  block/vmdk.c |  133 
> --
>  1 files changed, 83 insertions(+), 50 deletions(-)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index e22a893..ab15840 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -174,6 +174,17 @@ static void vmdk_free_extents(BlockDriverState *bs)
>  qemu_free(s->extents);
>  }
>  
> +static void vmdk_free_last_extent(BlockDriverState *bs)
> +{
> +BDRVVmdkState *s = bs->opaque;
> +
> +if (s->num_extents == 0) {
> +return;
> +}
> +s->num_extents--;
> +s->extents = qemu_realloc(s->extents, s->num_extents * 
> sizeof(VmdkExtent));
> +}
> +
>  static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
>  {
>  char desc[DESC_SIZE];
> @@ -357,18 +368,18 @@ static int vmdk_init_tables(BlockDriverState *bs, 
> VmdkExtent *extent)
>  return ret;
>  }
>  
> -static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
> +static int vmdk_open_vmdk3(BlockDriverState *bs,
> +   BlockDriverState *file,
> +   int flags)
>  {
>  int ret;
>  uint32_t magic;
>  VMDK3Header header;
> -BDRVVmdkState *s = bs->opaque;
>  VmdkExtent *extent;
>  
> -s->desc_offset = 0x200;
> -ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
> +ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
>  if (ret < 0) {
> -goto fail;
> +return ret;
>  }
>  extent = vmdk_add_extent(bs,
>   bs->file, false,
> @@ -378,58 +389,45 @@ static int vmdk_open_vmdk3(BlockDriverState *bs, int 
> flags)
>   le32_to_cpu(header.granularity));
>  ret = vmdk_init_tables(bs, extent);
>  if (ret) {
> -/* vmdk_init_tables cleans up on fail, so only free allocation of
> - * vmdk_add_extent here. */
> -goto fail;
> +/* free extent allocated by vmdk_add_extent */
> +vmdk_free_last_extent(bs);
>  }
> -return 0;
> - fail:
> -vmdk_free_extents(bs);
>  return ret;
>  }
>  
> -static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
> +static int vmdk_open_vmdk4(BlockDriverState *bs,
> +   BlockDriverState *file,
> +   int flags)
>  {
>  int ret;
>  uint32_t magic;
>  uint32_t l1_size, l1_entry_sectors;
>  VMDK4Header header;
> -BDRVVmdkState *s = bs->opaque;
>  VmdkExtent *extent;
>  
> -s->desc_offset = 0x200;
> -ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
> +ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
>  if (ret < 0) {
> -goto fail;
> +return ret;
>  }
>  l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
>  * le64_to_cpu(header.granularity);
> +if (l1_entry_sectors <= 0) {
> +return -EINVAL;
> +}
>  l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
>  / l1_entry_sectors;
> -extent = vmdk_add_extent(bs, bs->file, false,
> +extent = vmdk_add_extent(bs, file, false,
>le64_to_cpu(header.capacity),
>le64_to_cpu(header.gd_offset) << 9,
>le64_to_cpu(header.rgd_offset) << 9,
>l1_size,
>le32_to_cpu(header.num_gtes_per_gte),
>le64_to_cpu(header.granularity));
> -if (extent->l1_entry_sectors <= 0) {
> -ret = -EINVAL;
> -goto fail;
> -}
> -/* try to open parent images, if exist */
> -ret = vmdk_parent_open(bs);
> -if (ret) {
> -goto fail;
> -}
> -s->parent_cid = vmdk_read_cid(bs, 1);
>  ret = vmdk_init_tables(bs, extent);
>  if (ret) {
> -goto fail;
> +/* free extent allocated by vmdk_add_extent */
> +vmdk_free_last_extent(bs);
>  }
> -return 0;
> - fail:
> -vmdk_free_extents(bs);
>  return ret;
>  }
>  
> @@ -460,6 +458,31 @@ static int vmdk_parse_description(const char *desc, 
> const char *opt_name,
>  return 0;
>  }
>  
> +/* Open an extent file and append to bs array */
> +static int vmdk_open_sparse(BlockDriverState *bs,
> +BlockDriverState *file,
> +int flags)
> +{
> +uint32_t magic;
> +
> +if (bdrv_pread(file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
> +return -EIO;
> +}
> +
> +magic = be32_to_cpu(magic);
> +switch (magic) {
> +case VMDK3_MAGIC:
> +return vmdk_open_vmdk3(bs, file, flags);
> +break;
> +case VMDK4_MAGIC:
> +return vmdk_open_vmdk4(bs, file, flags);
> +break;
> +default:
> +

Re: [Qemu-devel] [PATCH v3 0/8] Add various VMDK subformats support

2011-09-06 Thread Kevin Wolf
Am 05.09.2011 10:15, schrieb Stefan Hajnoczi:
> On Fri, Aug 12, 2011 at 11:19:26PM +0800, Fam Zheng wrote:
>> Changes:
>> 02/06: Free extents on fail in vmdk_open.
>> 
>> Added:
>> 07/08: VMDK: bugfix, open Haiku vmdk image
>> 08/08: VMDK: bugfix, opening vSphere 4 exported image
>>
>> Fam Zheng (8):
>>   VMDK: enable twoGbMaxExtentFlat
>>   VMDK: add twoGbMaxExtentSparse support
>>   VMDK: separate vmdk_read_extent/vmdk_write_extent
>>   VMDK: Opening compressed extent.
>>   VMDK: read/write compressed extent
>>   VMDK: creating streamOptimized subformat
>>   VMDK: bugfix, open Haiku vmdk image
>>   VMDK: bugfix, opening vSphere 4 exported image
>>
>>  block/vmdk.c |  346 
>> +-
>>  1 files changed, 270 insertions(+), 76 deletions(-)
> 
> Reviewed-by: Stefan Hajnoczi 

Thanks, applied to the block branch.

I saw that in some places the result of bdrv_pread/pwrite is thrown away
and -EIO is returned instead. Please fix this on top of this series to
return the real error code.

Kevin



Re: [Qemu-devel] [PATCH] [SPARC] Gdbstub: Fix back-trace on SPARC32

2011-09-06 Thread Fabien Chouteau
On 05/09/2011 21:22, Blue Swirl wrote:
> On Mon, Sep 5, 2011 at 9:33 AM, Fabien Chouteau  wrote:
>> On 03/09/2011 11:25, Blue Swirl wrote:
>>> On Thu, Sep 1, 2011 at 2:17 PM, Fabien Chouteau  
>>> wrote:
 Gdb expects all registers windows to be flushed in ram, which is not the 
 case
 in Qemu. Therefore the back-trace generation doesn't work. This patch adds 
 a
 function to handle reads/writes in stack frames as if windows were flushed.

 Signed-off-by: Fabien Chouteau 
 ---
  gdbstub.c |   10 --
  target-sparc/cpu.h|7 
  target-sparc/helper.c |   85 
 +
  3 files changed, 99 insertions(+), 3 deletions(-)

 diff --git a/gdbstub.c b/gdbstub.c
 index 3b87c27..85d5ad7 100644
 --- a/gdbstub.c
 +++ b/gdbstub.c
 @@ -41,6 +41,9 @@
  #include "qemu_socket.h"
  #include "kvm.h"

 +#ifndef TARGET_CPU_MEMORY_RW_DEBUG
 +#define TARGET_CPU_MEMORY_RW_DEBUG cpu_memory_rw_debug
>>>
>>> These days, inline functions are preferred over macros.
>>>
>>
>> This is to allow target-specific implementation of the function.
>
> That can be done with inline functions too.

OK, how do you do that?

 +#endif

  enum {
 GDB_SIGNAL_0 = 0,
 @@ -2013,7 +2016,7 @@ static int gdb_handle_packet(GDBState *s, const char 
 *line_buf)
 if (*p == ',')
 p++;
 len = strtoull(p, NULL, 16);
 -if (cpu_memory_rw_debug(s->g_cpu, addr, mem_buf, len, 0) != 0) {
 +if (TARGET_CPU_MEMORY_RW_DEBUG(s->g_cpu, addr, mem_buf, len, 0) 
 != 0) {
>>>
>>> cpu_memory_rw_debug() could remain unwrapped with a generic function
>>> like cpu_gdb_sync_memory() which gdbstub should explicitly call.
>>>
>>> Maybe the lazy condition codes etc. could be handled in similar way,
>>> cpu_gdb_sync_registers().
>>>
>>
>> Excuse me, I don't understand here.
>
> cpu_gdb_{read,write}_register needs to force calculation of lazy
> condition codes. On Sparc this is handled by cpu_get_psr(), so it is
> not explicit.

I still don't understand you point. Do you suggest a cpu_gdb_sync_memory() that
will flush register windows?


 +
 +/* Gdb expects all registers windows to be flushed in ram. This function 
 handles
 + * reads/writes in stack frames as if windows were flushed. We assume 
 that the
 + * sparc ABI is followed.
 + */
>>>
>>> We can't assume that, it depends on what we are executing (BIOS, OS,
>>> even application).
>>
>> Well, maybe the statement is too strong. The ABI is required to get a valid
>> result. Gdb cannot build back-traces if the ABI is not followed anyway.
>
> But if the ABI assumption happens to be wrong (for example registers
> contain random values), memory may be corrupted because this would
> happily use whatever the registers contain.

This cannot corrupt memory, the point is to read/write in registers instead of
memory.


> Another way to fix this would be that GDB would tell QEMU what ABI to
> use for flushing. But how would one tell GDB about a non-standard ABI?
>
> For user emulators we can make ABI assumptions, there similar patch
> could make sense. But system emulators can't assume anything about the
> guest OS, it could be Linux, *BSD, a commercial OS or even a toy OS.

I think all of these kernels follow the SPARC32 ABI, and if they don't Gdb
cannot handle them anyway.

This solution covers 99% of the problem.

>
>>> On Sparc64 there are two ABIs (32 bit and 64 bit
>>> with offset of -2047), though calling flushw instruction could handle
>>> that.
>>
>> This solution is for SPARC32 only.
>>
>>> If the flush happens to trigger a fault, we're in big trouble.
>>>
>>
>> That's why it's safer/easier to use this "hackish" read/write in the 
>> registers.
>
> No, if the fault happens here, handling it may be tricky. See for
> example what paranoia Linux has to do for user window flushing, it
> involves the no-fault mode in MMU.

There's no possible fault, as we do not read or write in memory, that's the
point of this implementation.

>
>>> Overall, I think this is too hackish. Maybe this is a bug in GDB
>>> instead, information from backtrace is not reliable if ABI is not
>>> known.
>>>
>>
>> It's not a bug in Gdb. To build back-traces you have to read stack frames. To
>> read stack frames, register windows must be flushed.
>
> Yes, but the flusher should be GDB, assuming that flushing is even a
> good idea which I doubt.
>
> Back traces are not reliable in any case. The code could be compiled
> to omit the frame pointer.

This is a corner case. And again, something not supported by Gdb.


>> In Qemu we can avoid
>> flushing with this little trick.
>
> This doesn't avoid flushing but performs it magically during GDB memory 
> access.
>

...instead of writing and reading all register windows each time Gdb stops Qemu.
That's what I call "avoid flushing".

Regards,

-- 

[Qemu-devel] [PATCH v7 0/4] The intro of QEMU block I/O throttling

2011-09-06 Thread Zhi Yong Wu
The main goal of the patch is to effectively cap the disk I/O speed or counts 
of one single VM.It is only one draft, so it unavoidably has some drawbacks, if 
you catch them, please let me know.

The patch will mainly introduce one block I/O throttling algorithm, one timer 
and one block queue for each I/O limits enabled drive.

When a block request is coming in, the throttling algorithm will check if its 
I/O rate or counts exceed the limits; if yes, then it will enqueue to the block 
queue; The timer will handle the I/O requests in it.

Some available features follow as below:
(1) global bps limit.
   -drive bps=xxxin bytes/s
(2) only read bps limit
   -drive bps_rd=xxx in bytes/s
(3) only write bps limit
   -drive bps_wr=xxx in bytes/s
(4) global iops limit
   -drive iops=xxx   in ios/s
(5) only read iops limit
   -drive iops_rd=xxxin ios/s
(6) only write iops limit
   -drive iops_wr=xxxin ios/s
(7) the combination of some limits.
   -drive bps=xxx,iops=xxx

Known Limitations:
(1) #1 can not coexist with #2, #3
(2) #4 can not coexist with #5, #6
(3) When bps/iops limits are specified to a small value such as 511 bytes/s, 
this VM will hang up. We are considering how to handle this senario.

Changes since code V6:
  Mainly simply the block queue.
  Adjust codes based on stefan's comments.

Zhi Yong Wu (4):
  block: add the command line support
  block: add the block queue support
  block: add block timer and throttling algorithm
  qmp/hmp: add block_set_io_throttle

 v6: Mainly fix the aio callback issue for block queue.
 Adjust codes based on Ram Pai's comments.

 v5: add qmp/hmp support.
 Adjust the codes based on stefan's comments
 qmp/hmp: add block_set_io_throttle

 v4: fix memory leaking based on ryan's feedback.

 v3: Added the code for extending slice time, and modified the method to 
compute wait time for the timer.

 v2: The codes V2 for QEMU disk I/O limits.
 Modified the codes mainly based on stefan's comments.

 v1: Submit the codes for QEMU disk I/O limits.
 Only a code draft.

 Makefile.objs |2 +-
 block.c   |  331 +++--
 block.h   |6 +-
 block/blk-queue.c |  184 +
 block/blk-queue.h |   59 ++
 block_int.h   |   30 +
 blockdev.c|   98 
 blockdev.h|2 +
 hmp-commands.hx   |   15 +++
 qemu-config.c |   24 
 qemu-options.hx   |1 +
 qerror.c  |4 +
 qerror.h  |3 +
 qmp-commands.hx   |   52 -
 14 files changed, 796 insertions(+), 15 deletions(-)
 create mode 100644 block/blk-queue.c
 create mode 100644 block/blk-queue.h

-- 
1.7.6




[Qemu-devel] [PATCH v7 1/4] block: add the command line support

2011-09-06 Thread Zhi Yong Wu
Signed-off-by: Zhi Yong Wu 
---
 block.c |8 
 block_int.h |   30 ++
 blockdev.c  |   29 +
 blockdev.h  |2 ++
 qemu-config.c   |   24 
 qemu-options.hx |1 +
 6 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 03a21d8..17ee3df 100644
--- a/block.c
+++ b/block.c
@@ -1453,6 +1453,14 @@ void bdrv_get_geometry_hint(BlockDriverState *bs,
 *psecs = bs->secs;
 }
 
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+BlockIOLimit *io_limits)
+{
+bs->io_limits = *io_limits;
+bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
+}
+
 /* Recognize floppy formats */
 typedef struct FDFormat {
 FDriveType drive;
diff --git a/block_int.h b/block_int.h
index 8a72b80..368c776 100644
--- a/block_int.h
+++ b/block_int.h
@@ -29,10 +29,18 @@
 #include "qemu-queue.h"
 #include "qemu-coroutine.h"
 #include "qemu-timer.h"
+#include "block/blk-queue.h"
 
 #define BLOCK_FLAG_ENCRYPT 1
 #define BLOCK_FLAG_COMPAT6 4
 
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE1
+#define BLOCK_IO_LIMIT_TOTAL2
+
+#define BLOCK_IO_SLICE_TIME 1
+#define NANOSECONDS_PER_SECOND  10.0
+
 #define BLOCK_OPT_SIZE  "size"
 #define BLOCK_OPT_ENCRYPT   "encryption"
 #define BLOCK_OPT_COMPAT6   "compat6"
@@ -49,6 +57,16 @@ typedef struct AIOPool {
 BlockDriverAIOCB *free_aiocb;
 } AIOPool;
 
+typedef struct BlockIOLimit {
+uint64_t bps[3];
+uint64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIODisp {
+uint64_t bytes[2];
+uint64_t ios[2];
+} BlockIODisp;
+
 struct BlockDriver {
 const char *format_name;
 int instance_size;
@@ -184,6 +202,15 @@ struct BlockDriverState {
 
 void *sync_aiocb;
 
+/* the time for latest disk I/O */
+int64_t slice_start;
+int64_t slice_end;
+BlockIOLimit io_limits;
+BlockIODisp  io_disps;
+BlockQueue   *block_queue;
+QEMUTimer*block_timer;
+bool io_limits_enabled;
+
 /* I/O stats (display with "info blockstats"). */
 uint64_t nr_bytes[BDRV_MAX_IOTYPE];
 uint64_t nr_ops[BDRV_MAX_IOTYPE];
@@ -230,6 +257,9 @@ void qemu_aio_release(void *p);
 
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 
+void bdrv_set_io_limits(BlockDriverState *bs,
+BlockIOLimit *io_limits);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
diff --git a/blockdev.c b/blockdev.c
index 2602591..619ae9f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -236,6 +236,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
 int on_read_error, on_write_error;
 const char *devaddr;
 DriveInfo *dinfo;
+BlockIOLimit io_limits;
 int snapshot = 0;
 int ret;
 
@@ -354,6 +355,31 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
 }
 }
 
+/* disk I/O throttling */
+io_limits.bps[BLOCK_IO_LIMIT_TOTAL]  =
+   qemu_opt_get_number(opts, "bps", 0);
+io_limits.bps[BLOCK_IO_LIMIT_READ]   =
+   qemu_opt_get_number(opts, "bps_rd", 0);
+io_limits.bps[BLOCK_IO_LIMIT_WRITE]  =
+   qemu_opt_get_number(opts, "bps_wr", 0);
+io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
+   qemu_opt_get_number(opts, "iops", 0);
+io_limits.iops[BLOCK_IO_LIMIT_READ]  =
+   qemu_opt_get_number(opts, "iops_rd", 0);
+io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
+   qemu_opt_get_number(opts, "iops_wr", 0);
+
+if (((io_limits.bps[BLOCK_IO_LIMIT_TOTAL] != 0)
+&& ((io_limits.bps[BLOCK_IO_LIMIT_READ] != 0)
+|| (io_limits.bps[BLOCK_IO_LIMIT_WRITE] != 0)))
+|| ((io_limits.iops[BLOCK_IO_LIMIT_TOTAL] != 0)
+&& ((io_limits.iops[BLOCK_IO_LIMIT_READ] != 0)
+|| (io_limits.iops[BLOCK_IO_LIMIT_WRITE] != 0 {
+error_report("bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr)"
+ "cannot be used at the same time");
+return NULL;
+}
+
 on_write_error = BLOCK_ERR_STOP_ENOSPC;
 if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
 if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != 
IF_NONE) {
@@ -461,6 +487,9 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
 
 bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
 
+/* disk I/O throttling */
+bdrv_set_io_limits(dinfo->bdrv, &io_limits);
+
 switch(type) {
 case IF_IDE:
 case IF_SCSI:
diff --git a/blockdev.h b/blockdev.h
index 3587786..c2f44c6 100644
--- a/blockdev.h
+++ b/blockdev.h
@@ -63,6 +63,8 @@ int do_block_set_passwd(Monitor *mon, const QDict *qdict, 
QObject **ret_data);
 int do_change_block(Monitor *mon, const char *device,
 const char *fi

[Qemu-devel] [PATCH v7 2/4] block: add the block queue support

2011-09-06 Thread Zhi Yong Wu
Signed-off-by: Zhi Yong Wu 
---
 Makefile.objs |2 +-
 block/blk-queue.c |  184 +
 block/blk-queue.h |   59 +
 3 files changed, 244 insertions(+), 1 deletions(-)
 create mode 100644 block/blk-queue.c
 create mode 100644 block/blk-queue.h

diff --git a/Makefile.objs b/Makefile.objs
index d1f3e5d..96a7323 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -33,7 +33,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blk-queue.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
 block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block/blk-queue.c b/block/blk-queue.c
new file mode 100644
index 000..da01fcb
--- /dev/null
+++ b/block/blk-queue.c
@@ -0,0 +1,184 @@
+/*
+ * QEMU System Emulator queue definition for block layer
+ *
+ * Copyright (c) IBM, Corp. 2011
+ *
+ * Authors:
+ *  Zhi Yong Wu  
+ *  Stefan Hajnoczi 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block_int.h"
+#include "block/blk-queue.h"
+#include "qemu-common.h"
+
+/* The APIs for block request queue on qemu block layer.
+ */
+
+struct BlockQueueAIOCB {
+BlockDriverAIOCB common;
+QTAILQ_ENTRY(BlockQueueAIOCB) entry;
+BlockRequestHandler *handler;
+BlockDriverAIOCB *real_acb;
+
+int64_t sector_num;
+QEMUIOVector *qiov;
+int nb_sectors;
+};
+
+typedef struct BlockQueueAIOCB BlockQueueAIOCB;
+
+struct BlockQueue {
+QTAILQ_HEAD(requests, BlockQueueAIOCB) requests;
+bool flushing;
+};
+
+static void qemu_block_queue_dequeue(BlockQueue *queue,
+ BlockQueueAIOCB *request)
+{
+BlockQueueAIOCB *req;
+
+assert(queue);
+while (!QTAILQ_EMPTY(&queue->requests)) {
+req = QTAILQ_FIRST(&queue->requests);
+if (req == request) {
+QTAILQ_REMOVE(&queue->requests, req, entry);
+break;
+}
+}
+}
+
+static void qemu_block_queue_cancel(BlockDriverAIOCB *acb)
+{
+BlockQueueAIOCB *request = container_of(acb, BlockQueueAIOCB, common);
+if (request->real_acb) {
+bdrv_aio_cancel(request->real_acb);
+} else {
+assert(request->common.bs->block_queue);
+qemu_block_queue_dequeue(request->common.bs->block_queue,
+ request);
+}
+
+qemu_aio_release(request);
+}
+
+static AIOPool block_queue_pool = {
+.aiocb_size = sizeof(struct BlockQueueAIOCB),
+.cancel = qemu_block_queue_cancel,
+};
+
+BlockQueue *qemu_new_block_queue(void)
+{
+BlockQueue *queue;
+
+queue = g_malloc0(sizeof(BlockQueue));
+
+QTAILQ_INIT(&queue->requests);
+
+queue->flushing = false;
+
+return queue;
+}
+
+void qemu_del_block_queue(BlockQueue *queue)
+{
+BlockQueueAIOCB *request, *next;
+
+QTAILQ_FOREACH_SAFE(request, &queue->requests, entry, next) {
+QTAILQ_REMOVE(&queue->requests, request, entry);
+qemu_aio_release(request);
+}
+
+g_free(queue);
+}
+
+BlockDriverAIOCB *qemu_block_queue_enqueue(BlockQueue *queue,
+BlockDriverState *bs,
+BlockRequestHandler *handler,
+int64_t sector_num,
+QEMUIOVector *qiov,
+int nb_sectors,
+BlockDriverCompletionFunc *cb,
+void *opaque)
+{
+BlockDriverAIOCB *acb;
+BlockQueueAIOCB *request;
+
+if (queue->flushing) {
+return NULL;
+} else {
+acb = qemu_aio_get(&blo

[Qemu-devel] [PATCH v7 3/4] block: add block timer and throttling algorithm

2011-09-06 Thread Zhi Yong Wu
Note:
 1.) When bps/iops limits are specified to a small value such as 511 
bytes/s, this VM will hang up. We are considering how to handle this senario.
 2.) When "dd" command is issued in guest, if its option bs is set to a 
large value such as "bs=1024K", the result speed will slightly bigger than the 
limits.

For these problems, if you have nice thought, pls let us know.:)

Signed-off-by: Zhi Yong Wu 
---
 block.c|  297 ++--
 block.h|6 +-
 blockdev.c |   69 ++
 3 files changed, 361 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 17ee3df..1f6f188 100644
--- a/block.c
+++ b/block.c
@@ -30,6 +30,9 @@
 #include "qemu-objects.h"
 #include "qemu-coroutine.h"
 
+#include "qemu-timer.h"
+#include "block/blk-queue.h"
+
 #ifdef CONFIG_BSD
 #include 
 #include 
@@ -72,6 +75,13 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState 
*bs,
  QEMUIOVector *iov);
 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
 
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+bool is_write, int64_t *wait);
+
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
@@ -104,6 +114,57 @@ int is_windows_drive(const char *filename)
 }
 #endif
 
+/* throttling disk I/O limits */
+void bdrv_io_limits_disable(BlockDriverState *bs)
+{
+bs->io_limits_enabled = false;
+
+if (bs->block_queue) {
+qemu_block_queue_flush(bs->block_queue);
+qemu_del_block_queue(bs->block_queue);
+bs->block_queue = NULL;
+}
+
+if (bs->block_timer) {
+qemu_del_timer(bs->block_timer);
+qemu_free_timer(bs->block_timer);
+bs->block_timer = NULL;
+}
+
+bs->slice_start = 0;
+
+bs->slice_end   = 0;
+}
+
+static void bdrv_block_timer(void *opaque)
+{
+BlockDriverState *bs = opaque;
+BlockQueue *queue= bs->block_queue;
+
+qemu_block_queue_flush(queue);
+}
+
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+bs->block_queue = qemu_new_block_queue();
+bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+
+bs->slice_start = qemu_get_clock_ns(vm_clock);
+
+bs->slice_end   = bs->slice_start + BLOCK_IO_SLICE_TIME;
+}
+
+bool bdrv_io_limits_enabled(BlockDriverState *bs)
+{
+BlockIOLimit *io_limits = &bs->io_limits;
+return io_limits->bps[BLOCK_IO_LIMIT_READ]
+ || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
+ || io_limits->iops[BLOCK_IO_LIMIT_READ]
+ || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+}
+
 /* check if the path starts with ":" */
 static int path_has_protocol(const char *path)
 {
@@ -694,6 +755,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, 
int flags,
 bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
 }
 
+/* throttling disk I/O limits */
+if (bs->io_limits_enabled) {
+bdrv_io_limits_enable(bs);
+}
+
 return 0;
 
 unlink_and_fail:
@@ -732,6 +798,18 @@ void bdrv_close(BlockDriverState *bs)
 if (bs->change_cb)
 bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
 }
+
+/* throttling disk I/O limits */
+if (bs->block_queue) {
+qemu_del_block_queue(bs->block_queue);
+bs->block_queue = NULL;
+}
+
+if (bs->block_timer) {
+qemu_del_timer(bs->block_timer);
+qemu_free_timer(bs->block_timer);
+bs->block_timer = NULL;
+}
 }
 
 void bdrv_close_all(void)
@@ -2290,16 +2368,40 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, 
int64_t sector_num,
  BlockDriverCompletionFunc *cb, void *opaque)
 {
 BlockDriver *drv = bs->drv;
+BlockDriverAIOCB *ret;
+int64_t wait_time = -1;
 
 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
 
-if (!drv)
-return NULL;
-if (bdrv_check_request(bs, sector_num, nb_sectors))
+if (!drv || bdrv_check_request(bs, sector_num, nb_sectors)) {
 return NULL;
+}
 
-return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+/* throttling disk read I/O */
+if (bs->io_limits_enabled) {
+if (bdrv_exceed_io_limits(bs, nb_sectors, false, &wait_time)) {
+ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_readv,
+   sector_num, qiov, nb_sectors, cb, opaque);
+if (wait_time != -1) {
+qemu_mod_timer(bs->block_timer,
+   wait_time + qemu_get_clock_ns(vm_clock));
+}
+
+

[Qemu-devel] [PATCH v7 4/4] qmp/hmp: add block_set_io_throttle

2011-09-06 Thread Zhi Yong Wu
The patch introduce one new command block_set_io_throttle; For its usage 
syntax, if you have better idea, pls let me know.

Signed-off-by: Zhi Yong Wu 
---
 block.c |   26 --
 hmp-commands.hx |   15 +++
 qerror.c|4 
 qerror.h|3 +++
 qmp-commands.hx |   52 +++-
 5 files changed, 97 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index 1f6f188..dfa5567 100644
--- a/block.c
+++ b/block.c
@@ -1938,6 +1938,16 @@ static void bdrv_print_dict(QObject *obj, void *opaque)
 qdict_get_bool(qdict, "ro"),
 qdict_get_str(qdict, "drv"),
 qdict_get_bool(qdict, "encrypted"));
+
+monitor_printf(mon, " bps=%" PRId64 " bps_rd=%" PRId64
+" bps_wr=%" PRId64 " iops=%" PRId64
+" iops_rd=%" PRId64 " iops_wr=%" PRId64,
+qdict_get_int(qdict, "bps"),
+qdict_get_int(qdict, "bps_rd"),
+qdict_get_int(qdict, "bps_wr"),
+qdict_get_int(qdict, "iops"),
+qdict_get_int(qdict, "iops_rd"),
+qdict_get_int(qdict, "iops_wr"));
 } else {
 monitor_printf(mon, " [not inserted]");
 }
@@ -1970,10 +1980,22 @@ void bdrv_info(Monitor *mon, QObject **ret_data)
 QDict *bs_dict = qobject_to_qdict(bs_obj);
 
 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
- "'encrypted': %i }",
+ "'encrypted': %i, "
+ "'bps': %" PRId64 ","
+ "'bps_rd': %" PRId64 ","
+ "'bps_wr': %" PRId64 ","
+ "'iops': %" PRId64 ","
+ "'iops_rd': %" PRId64 ","
+ "'iops_wr': %" PRId64 "}",
  bs->filename, bs->read_only,
  bs->drv->format_name,
- bdrv_is_encrypted(bs));
+ bdrv_is_encrypted(bs),
+ bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL],
+ bs->io_limits.bps[BLOCK_IO_LIMIT_READ],
+ bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE],
+ bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL],
+ bs->io_limits.iops[BLOCK_IO_LIMIT_READ],
+ bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]);
 if (bs->backing_file[0] != '\0') {
 QDict *qdict = qobject_to_qdict(obj);
 qdict_put(qdict, "backing_file",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0ccfb28..147f9cf 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1210,6 +1210,21 @@ ETEXI
 },
 
 STEXI
+@item block_set_io_throttle @var{device} @var{bps} @var{bps_rd} @var{bps_wr} 
@var{iops} @var{iops_rd} @var{iops_wr}
+@findex block_set_io_throttle
+Change I/O throttle limits for a block drive to @var{bps} @var{bps_rd} 
@var{bps_wr} @var{iops} @var{iops_rd} @var{iops_wr}
+ETEXI
+
+{
+.name   = "block_set_io_throttle",
+.args_type  = 
"device:B,bps:i?,bps_rd:i?,bps_wr:i?,iops:i?,iops_rd:i?,iops_wr:i?",
+.params = "device [bps] [bps_rd] [bps_wr] [iops] [iops_rd] 
[iops_wr]",
+.help   = "change I/O throttle limits for a block drive",
+.user_print = monitor_user_noop,
+.mhandler.cmd_new = do_block_set_io_throttle,
+},
+
+STEXI
 @item block_passwd @var{device} @var{password}
 @findex block_passwd
 Set the encrypted device @var{device} password to @var{password}
diff --git a/qerror.c b/qerror.c
index 3d64b80..33f9fdd 100644
--- a/qerror.c
+++ b/qerror.c
@@ -230,6 +230,10 @@ static const QErrorStringTable qerror_table[] = {
 .error_fmt = QERR_QGA_COMMAND_FAILED,
 .desc  = "Guest agent command failed, error was '%(message)'",
 },
+{
+.error_fmt = QERR_INVALID_PARAMETER_COMBINATION,
+.desc  = "Invalid paramter combination",
+},
 {}
 };
 
diff --git a/qerror.h b/qerror.h
index 8058456..62c1df2 100644
--- a/qerror.h
+++ b/qerror.h
@@ -193,4 +193,7 @@ QError *qobject_to_qerror(const QObject *obj);
 #define QERR_QGA_COMMAND_FAILED \
 "{ 'class': 'QgaCommandFailed', 'data': { 'message': %s } }"
 
+#define QERR_INVALID_PARAMETER_COMBINATION \
+"{ 'class': 'InvalidParameterCombination', 'data': {} }"
+
 #endif /* QERROR_H */
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 27cc66e..e848969 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -862,6 +862,44 @@ Example:
 EQMP
 
 {

Re: [Qemu-devel] [PATCH] scsi: execute SYNCHRONIZE_CACHE asynchronously

2011-09-06 Thread Kevin Wolf
Am 06.09.2011 09:08, schrieb Paolo Bonzini:
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/scsi-disk.c |   41 +
>  1 files changed, 25 insertions(+), 16 deletions(-)

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH] scsi: fix accounting of writes

2011-09-06 Thread Kevin Wolf
Am 05.09.2011 18:11, schrieb Paolo Bonzini:
> Writes go through scsi_write_complete at least twice, the first time
> to get some data without having actually written anything.  Because
> of this, the first time scsi_write_complete is called it will call
> bdrv_acct_done and account a read incorrectly.  Fix this by looking
> at the aiocb.  I am doing the same in scsi_read_complete for symmetry,
> but it is only needed in the (bogus) case of bdrv_aio_readv returning
> NULL.
> 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] qemu segfaults at start

2011-09-06 Thread octane indice
En réponse à Stefan Weil  :
> /usr/local/bin/qemu is stripped because it was installed with
> make install,
> so there is no useful debugging information.
> 
> Please look for the unstripped i386-softmmu/qemu executable in
> your build path and run it using gdb.
> 
$ gdb --args /usr/src/qemu-0.15.0/i386-softmmu/qemu disk.img -vnc 
127.0.0.1:1
GNU gdb (GDB) 7.1
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 

This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i486-slackware-linux".
For bug reporting instructions, please see:
...
Reading symbols from /usr/src/qemu-0.15.0/i386-softmmu/qemu...done.
(gdb) r
Starting program: /usr/src/qemu-0.15.0/i386-softmmu/qemu disk.img -vnc 
127.0.0.1:1
[Thread debugging using libthread_db enabled]

Program received signal SIGSEGV, Segmentation fault.
raise_interrupt (intno=8, is_int=0, error_code=0, next_eip_addend=0)
at /usr/src/qemu-0.15.0/target-i386/op_helper.c:1375
1375env->exception_index = intno;
(gdb) bt
#0  raise_interrupt (intno=8, is_int=0, error_code=0, next_eip_addend=0)
at /usr/src/qemu-0.15.0/target-i386/op_helper.c:1375
#1  0x081a9b50 in raise_exception_err (exception_index=13, error_code=8)
at /usr/src/qemu-0.15.0/target-i386/op_helper.c:1386
#2  0xcdb0e012 in ?? ()
#3  0x0071 in ?? ()
#4  0x008f in ?? ()
#5  0x in ?? ()
(gdb) info reg
eax0xbfffeee8   -1073746200
ecx0x1  1
edx0x0  0
ebx0x8  8
esp0xbfffee30   0xbfffee30
ebp0xbfffeee8   0xbfffeee8
esi0x0  0
edi0x0  0
eip0x81a94c10x81a94c1 
eflags 0x210246 [ PF ZF IF RF ID ]
cs 0x73 115
ss 0x7b 123
ds 0x7b 123
es 0x7b 123
fs 0x0  0
gs 0x33 51
(gdb)

> Regards,
> Stefan Weil
> 
I hope there's more to understand.
Thanks

Envoyé avec Inmano, ma messagerie renversante et gratuite : 
http://www.inmano.com






Re: [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm

2011-09-06 Thread Liu, Jinsong
Thanks Avi, Marcelo, Kevin for comments, sorry for late reply (just come back 
from vacation).


Avi Kivity wrote:
> On 08/17/2011 07:19 AM, Liu, Jinsong wrote:
>>  From a9670ddff84080c56183e2d678189e100f891174 Mon Sep 17 00:00:00
>> 2001 
>> From: Liu, Jinsong
>> Date: Wed, 17 Aug 2011 11:36:28 +0800
>> Subject: [PATCH] KVM: emulate lapic tsc deadline timer for hvm
> 
> kvm doesn't have hvm.
> 

Yes, I will update patch title and description, remove 'hvm'.


>> This patch emulate lapic tsc deadline timer for hvm:
>> Enumerate tsc deadline timer capacibility by CPUID;
>> Enable tsc deadline timer mode by LAPIC MMIO;
>> Start tsc deadline timer by MSR;
> 
>> diff --git a/arch/x86/include/asm/cpufeature.h
>> b/arch/x86/include/asm/cpufeature.h index 4258aac..28bcf48 100644
>> --- a/arch/x86/include/asm/cpufeature.h +++
>> b/arch/x86/include/asm/cpufeature.h @@ -120,6 +120,7 @@
>>   #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
>>   #define X86_FEATURE_MOVBE  (4*32+22) /* MOVBE instruction */
>>   #define X86_FEATURE_POPCNT  (4*32+23) /* POPCNT instruction */
>> +#define X86_FEATURE_TSC_DEADLINE_TIMER(4*32+24) /* Tsc deadline
>>   timer */ #define X86_FEATURE_AES   (4*32+25) /* AES instructions */
>>   #define X86_FEATURE_XSAVE  (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV
>>   */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in
>> the OS */ 
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h index 307e3cf..28f7128 100644 ---
>> a/arch/x86/include/asm/kvm_host.h +++
>> b/arch/x86/include/asm/kvm_host.h @@ -635,6 +635,7 @@ struct
>>  kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu,
>> struct x86_instruction_info *info,
>> enum x86_intercept_stage stage);
>> +u64 (*guest_to_host_tsc)(u64 guest_tsc);
>>   };
> 
> Please put this near the other tsc functions.  Add a comment
> explaining what value is returned under nested virtualization.

OK

> 
> Please add the svm callback implementation.
> 

It's un-necessary to add svm callback.
AMD lapic timer has no tsc deadline mode, svm callback would be an unused dummy 
function.


>> 
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -229,6 +229,8 @@
>>   #define MSR_IA32_APICBASE_ENABLE   (1<<11)
>>   #define MSR_IA32_APICBASE_BASE (0xf<<12)
>> 
>> +#define MSR_IA32_TSCDEADLINE0x06e0
>> +
>>   #define MSR_IA32_UCODE_WRITE   0x0079
>>   #define MSR_IA32_UCODE_REV 0x008b
>> 
> 
> Need to add to msrs_to_save so live migration works.
> 

Fine

>> 
>> @@ -665,28 +682,30 @@ static void update_divide_count(struct
>>   kvm_lapic *apic) static void start_apic_timer(struct kvm_lapic
>>  *apic)   { ktime_t now =
>> apic->lapic_timer.timer.base->get_time(); - 
>> -apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
>> -APIC_BUS_CYCLE_NS * apic->divide_count;
>>  atomic_set(&apic->lapic_timer.pending, 0);
>> 
>> -if (!apic->lapic_timer.period)
>> -return;
>> -/*
>> - * Do not allow the guest to program periodic timers with small
>> - * interval, since the hrtimers are not throttled by the host
>> - * scheduler.
>> - */
>> -if (apic_lvtt_period(apic)) {
>> -if (apic->lapic_timer.period<  NSEC_PER_MSEC/2)
>> -apic->lapic_timer.period = NSEC_PER_MSEC/2;
>> -}
>> +/* lapic timer in oneshot or peroidic mode */
>> +if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
>> +apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
>> +* APIC_BUS_CYCLE_NS * apic->divide_count;
>> 
>> -hrtimer_start(&apic->lapic_timer.timer,
>> -  ktime_add_ns(now, apic->lapic_timer.period),
>> -  HRTIMER_MODE_ABS);
>> +if (!apic->lapic_timer.period)
>> +return;
>> +/*
>> + * Do not allow the guest to program periodic timers with small
>> + * interval, since the hrtimers are not throttled by the host + 
>> 
>> * scheduler. +*/
>> +if (apic_lvtt_period(apic)) {
>> +if (apic->lapic_timer.period<  NSEC_PER_MSEC/2)
>> +apic->lapic_timer.period = NSEC_PER_MSEC/2;
>> +}
>> 
>> -apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
>> +hrtimer_start(&apic->lapic_timer.timer,
>> +  ktime_add_ns(now, apic->lapic_timer.period), +
>>  
>> HRTIMER_MODE_ABS); +
>> +apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 
>>   
>> PRIx64 ", " "timer initial count 0x%x, period 
>> %lldns, "
>> "expire @ 0x%016" PRIx64 ".\n", __func__,
>> @@ -695,6 +714,26 @@ static void star

Re: [Qemu-devel] qemu segfaults at start

2011-09-06 Thread octane indice
En réponse à Stefan Hajnoczi  :
> You can run QEMU completely without a disk, just run:
> $ gdb qemu
> (gdb) r
> 
> I wonder if it crashes that way too.
> 
Yes:
(gdb) r
Starting program: /usr/src/qemu-0.15.0/i386-softmmu/qemu -vnc 127.0.0.1:1
[Thread debugging using libthread_db enabled]

Program received signal SIGSEGV, Segmentation fault.
raise_interrupt (intno=8, is_int=0, error_code=0, next_eip_addend=0)
at /usr/src/qemu-0.15.0/target-i386/op_helper.c:1375
1375env->exception_index = intno;
(gdb)



Envoyé avec Inmano, ma messagerie renversante et gratuite : 
http://www.inmano.com






Re: [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm

2011-09-06 Thread Liu, Jinsong
Marcelo Tosatti wrote:
>>> --- a/arch/x86/include/asm/msr-index.h
>>> +++ b/arch/x86/include/asm/msr-index.h
>>> @@ -229,6 +229,8 @@
>>>  #define MSR_IA32_APICBASE_ENABLE   (1<<11)
>>>  #define MSR_IA32_APICBASE_BASE (0xf<<12)
>>> 
>>> +#define MSR_IA32_TSCDEADLINE   0x06e0
>>> +
>>>  #define MSR_IA32_UCODE_WRITE   0x0079
>>>  #define MSR_IA32_UCODE_REV 0x008b
>> 
>> Need to add to msrs_to_save so live migration works.
> 
> MSR must be explicitly listed in qemu, also.
> 

OK

>>> +   if (!apic->lapic_timer.tscdeadline)
>>> +   return;
>>> +
>>> +   tsc_target = kvm_x86_ops->
>>> +   guest_to_host_tsc(apic->lapic_timer.tscdeadline);
>>> +   rdtscll(tsc_now); + tsc_delta = tsc_target - 
>>> tsc_now;
>> 
>> This only works if we have a constant tsc, that's not true for large
>> multiboard machines.  Need to do this with irqs disabled as well
>> (reading both 'now' and 'tsc_now' in the same critical section).
> 
> Should look like this:
> 
> local_irq_disable();
> u64 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
> if (guest_tsc <= tscdeadline)
> hrtimer_start(now);
> else {
>   ns = convert_to_ns(guest_tsc - tscdeadline);
>   hrtimer_start(now + ns);
> }
> local_irq_enable();
> 
> Note the vcpus tsc can have different frequency than the hosts, so
> vcpu_tsc_khz() should be used to convert to nanoseconds, not tsc_khz.
> 

Fine.


Thanks,
Jinsong


[Qemu-devel] glib mainloop breaks virtfs

2011-09-06 Thread Gerd Hoffmann

  Hi,

virtfs stopped working for me in master, the guest (fedora 15) just 
hangs at boot when mounting the virtfs filesystems.  Bisecting points to 
this commit:


rincewind kraxel ~/projects/qemu ((69e5bb6...)|BISECTING)# git bisect good
4d88a2ac8643265108ef1fb47ceee5d7b28e19f2 is the first bad commit
commit 4d88a2ac8643265108ef1fb47ceee5d7b28e19f2
Author: Anthony Liguori 
Date:   Mon Aug 22 08:12:53 2011 -0500

main: switch qemu_set_fd_handler to g_io_add_watch

cheers,
  Gerd



Re: [Qemu-devel] [PATCH] KVM: emulate lapic tsc deadline timer for hvm

2011-09-06 Thread Avi Kivity

On 09/06/2011 02:18 PM, Liu, Jinsong wrote:

>>struct x86_instruction_info *info,
>>enum x86_intercept_stage stage);
>>  + u64 (*guest_to_host_tsc)(u64 guest_tsc);
>>};
>
>  Please put this near the other tsc functions.  Add a comment
>  explaining what value is returned under nested virtualization.

OK

>
>  Please add the svm callback implementation.
>

It's un-necessary to add svm callback.
AMD lapic timer has no tsc deadline mode, svm callback would be an unused dummy 
function.


It's a generic function, at the moment it's only used by tsc deadline 
timer but it could be used tomorrow for something else.



>>  + u64 tsc_now, tsc_target, tsc_delta, nsec;
>>  +
>>  + if (!apic->lapic_timer.tscdeadline)
>>  + return;
>>  +
>>  + tsc_target = kvm_x86_ops->
>>  + guest_to_host_tsc(apic->lapic_timer.tscdeadline);
>>  + rdtscll(tsc_now); + tsc_delta = tsc_target - 
tsc_now;
>
>  This only works if we have a constant tsc, that's not true for large

Agree with Kevin, variable tsc exposes tricky issues to time virtualization in 
general.
At some very old processors it maybe variable tsc, but all recent processors 
work on constant tsc regardless of Px and Cx.
For lapic tsc deadline timer capacibility cpu, I think it works on constant tsc 
otherwise os has no way to expect next absolute timepoint.


Well, we need something better here.  It's not just old cpus, it's also 
large multi-board machines.



>>  +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) +{
>>  + struct kvm_lapic *apic = vcpu->arch.apic;
>>  +
>>  + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
>>  + return 0;
>
>  Why?
>

Intel SDM define such hardware behavior (10.5.4.1):
'In other timer modes (LVT bit 18 = 0), the IA32_TSC_DEADLINE MSR reads zero 
and writes are ignored.'


Ok.



>
>>/*
>> * Empty call-back. Needs to be implemented when VMX enables the
>>  SET_TSC_KHZ
>> * ioctl. In this case the call-back should update internal vmx
>>  state to make @@ -6270,6 +6278,16 @@ static void
>>vmx_cpuid_update(struct kvm_vcpu *vcpu)   
  } }
>>}
>>  +
>>  + /*
>>  +  * Emulate Intel lapic tsc deadline timer even if host not support
>>  it. +  * Open CPUID.1.ECX[24] and use bit17/18 as timer mode mask.
>>  +  */ +   best = kvm_find_cpuid_entry(vcpu, 1, 0);
>>  + if (best) {
>>  + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
>>  + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3<<   17); +  
  }
>>}
>
>  Should be in common code; there is nothing vmx specific about it
>  (although it is Intel specific at present).

AMD lapic timer has no tsc deadline mode and so cpuid.1.ecx[24], and its 
timer_mode_mask should be 1<<  17 (AMD APM 16.4.1.)
It would be more safe to handle cpuid and lapic timer_mode_mask in arch code.



Again, vmx.c is NOT about Intel specific code.  It is about vmx-specific 
code - programming vmx via VMREAD and VMWRITE.


(not to mention that we don't really need host cpu support for this - we 
can emulate tsc deadline mode without host cpu support at all, as this 
patchset shows).


--
error compiling committee.c: too many arguments to function




[Qemu-devel] [PATCH] s390: remove boot image detection to fix boot with newer kernels

2011-09-06 Thread Christian Borntraeger
Alex,

Newer kernels will not always have a 0dd0 (basr 13,0) at address 0x1.
(e.g. current linux-next). We must not rely on specific code at certain
addresses, so lets just remove this check.

Reported-by: Philipp Muens 
Signed-off-by: Christian Borntraeger 

---
 hw/s390-virtio.c |5 -
 1 file changed, 5 deletions(-)

Index: b/hw/s390-virtio.c
===
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -193,11 +193,6 @@ static void s390_init(ram_addr_t my_ram_
 if (kernel_filename) {
 kernel_size = load_image(kernel_filename, qemu_get_ram_ptr(0));
 
-if (lduw_be_phys(KERN_IMAGE_START) != 0x0dd0) {
-fprintf(stderr, "Specified image is not an s390 boot image\n");
-exit(1);
-}
-
 env->psw.addr = KERN_IMAGE_START;
 env->psw.mask = 0x00018000ULL;
 } else {



[Qemu-devel] [PATCH 1/9] Move vm_state_notify() prototype from cpus.h to sysemu.h

2011-09-06 Thread Luiz Capitulino
It's where all the state handling functions prototypes are located.

Signed-off-by: Luiz Capitulino 
---
 cpus.h   |1 -
 sysemu.h |1 +
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/cpus.h b/cpus.h
index f42b54e..5885885 100644
--- a/cpus.h
+++ b/cpus.h
@@ -15,7 +15,6 @@ void cpu_synchronize_all_post_init(void);
 /* vl.c */
 extern int smp_cores;
 extern int smp_threads;
-void vm_state_notify(int running, int reason);
 bool cpu_exec_all(void);
 void set_numa_modes(void);
 void set_cpu_log(const char *optarg);
diff --git a/sysemu.h b/sysemu.h
index 9090457..eb66af5 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -23,6 +23,7 @@ typedef void VMChangeStateHandler(void *opaque, int running, 
int reason);
 VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
  void *opaque);
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+void vm_state_notify(int running, int reason);
 
 #define VMSTOP_USER  0
 #define VMSTOP_DEBUG 1
-- 
1.7.7.rc0.72.g4b5ea




[Qemu-devel] [PATCH v4 0/9]: Introduce the RunState type

2011-09-06 Thread Luiz Capitulino
It replaces the VMSTOP macros and allows us to drop some global variables.

Additionally, the problem with issuing 'cont' when the VM is in bad state
is addressed and we make the current state available in QMP and HMP.

** IMPORTANT: patch 04/09 is a new addition, it needs careful review.

changelog
-

v4

o Rebase on top of latest master
o Fix qemu_system_vmstop_request() to take a RunState argument
o Drop RSTATE_RESTORE_FAILED, it's not currently used
o Check for valid transitions (will abort on invalid ones)

v3

o Rebase on top of latest master
o Fix a spice VM change state handler which hasn't been converted
o Turn runstate_get() into runstate_check()
o Rename vm_is_running() to runstate_is_running()
o Other minor renames and log improvements

v2

o Rename the new type from QemuState to RunState
  (also renames related functions)
o Rename the enum values to contain proper word seperation
o Redo patch 'Monitor: Don't allow cont on bad VM state' to not use a global
  variable
o Make the current VM state also available in HMP
o Improve some commit logs a bit




[Qemu-devel] [PATCH 6/9] Drop the vm_running global variable

2011-09-06 Thread Luiz Capitulino
Use runstate_is_running() instead, which is introduced by this commit.

Signed-off-by: Luiz Capitulino 
---
 cpus.c|9 -
 gdbstub.c |4 ++--
 hw/etraxfs_dma.c  |2 +-
 hw/kvmclock.c |2 +-
 hw/virtio.c   |2 +-
 migration.c   |2 +-
 monitor.c |4 ++--
 qemu-timer.c  |6 +++---
 savevm.c  |4 ++--
 sysemu.h  |2 +-
 target-i386/kvm.c |2 +-
 ui/sdl.c  |6 +++---
 vl.c  |9 ++---
 xen-all.c |2 +-
 14 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/cpus.c b/cpus.c
index 3444bc0..883c27a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -115,14 +115,13 @@ void cpu_synchronize_all_post_init(void)
 
 int cpu_is_stopped(CPUState *env)
 {
-return !vm_running || env->stopped;
+return !runstate_is_running() || env->stopped;
 }
 
 static void do_vm_stop(RunState state)
 {
-if (vm_running) {
+if (runstate_is_running()) {
 cpu_disable_ticks();
-vm_running = 0;
 pause_all_vcpus();
 runstate_set(state);
 vm_state_notify(0, state);
@@ -137,7 +136,7 @@ static int cpu_can_run(CPUState *env)
 if (env->stop) {
 return 0;
 }
-if (env->stopped || !vm_running) {
+if (env->stopped || !runstate_is_running()) {
 return 0;
 }
 return 1;
@@ -148,7 +147,7 @@ static bool cpu_thread_is_idle(CPUState *env)
 if (env->stop || env->queued_work_first) {
 return false;
 }
-if (env->stopped || !vm_running) {
+if (env->stopped || !runstate_is_running()) {
 return true;
 }
 if (!env->halted || qemu_cpu_has_work(env) ||
diff --git a/gdbstub.c b/gdbstub.c
index a308a76..ceb39cc 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2428,7 +2428,7 @@ static void gdb_read_byte(GDBState *s, int ch)
 if (ch != '$')
 return;
 }
-if (vm_running) {
+if (runstate_is_running()) {
 /* when the CPU is running, we cannot do anything except stop
it when receiving a char */
 vm_stop(RSTATE_PAUSED);
@@ -2733,7 +2733,7 @@ static int gdb_monitor_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 #ifndef _WIN32
 static void gdb_sigterm_handler(int signal)
 {
-if (vm_running) {
+if (runstate_is_running()) {
 vm_stop(RSTATE_PAUSED);
 }
 }
diff --git a/hw/etraxfs_dma.c b/hw/etraxfs_dma.c
index e8ad9e6..d3082ac 100644
--- a/hw/etraxfs_dma.c
+++ b/hw/etraxfs_dma.c
@@ -732,7 +732,7 @@ static void DMA_run(void *opaque)
 struct fs_dma_ctrl *etraxfs_dmac = opaque;
 int p = 1;
 
-if (vm_running)
+if (runstate_is_running())
 p = etraxfs_dmac_run(etraxfs_dmac);
 
 if (p)
diff --git a/hw/kvmclock.c b/hw/kvmclock.c
index 88961be..5388bc4 100644
--- a/hw/kvmclock.c
+++ b/hw/kvmclock.c
@@ -46,7 +46,7 @@ static void kvmclock_pre_save(void *opaque)
  * it on next vmsave (which would return a different value). Will be reset
  * when the VM is continued.
  */
-s->clock_valid = !vm_running;
+s->clock_valid = !runstate_is_running();
 }
 
 static int kvmclock_post_load(void *opaque, int version_id)
diff --git a/hw/virtio.c b/hw/virtio.c
index 74ab79e..c577bbe 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -870,7 +870,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t 
device_id,
 vdev->queue_sel = 0;
 vdev->config_vector = VIRTIO_NO_VECTOR;
 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
-vdev->vm_running = vm_running;
+vdev->vm_running = runstate_is_running();
 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
 vdev->vq[i].vdev = vdev;
diff --git a/migration.c b/migration.c
index a63e2a2..7dd8f4e 100644
--- a/migration.c
+++ b/migration.c
@@ -372,7 +372,7 @@ void migrate_fd_put_ready(void *opaque)
 DPRINTF("iterate\n");
 if (qemu_savevm_state_iterate(s->mon, s->file) == 1) {
 int state;
-int old_vm_running = vm_running;
+int old_vm_running = runstate_is_running();
 
 DPRINTF("done iterating\n");
 vm_stop(RSTATE_PRE_MIGRATE);
diff --git a/monitor.c b/monitor.c
index afe858c..10cab1d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2632,7 +2632,7 @@ static void do_info_status_print(Monitor *mon, const 
QObject *data)
 static void do_info_status(Monitor *mon, QObject **ret_data)
 {
 *ret_data = qobject_from_jsonf("{ 'running': %i, 'singlestep': %i }",
-vm_running, singlestep);
+runstate_is_running(), singlestep);
 }
 
 static qemu_acl *find_acl(Monitor *mon, const char *name)
@@ -2825,7 +2825,7 @@ static int do_closefd(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 
 static void do_loadvm(Monitor *mon, const QDict *qdict)
 {
-int saved_vm_running  = vm_running;
+int saved_vm_running  = runstate_is_running();
 const char *name = qdict_get_str(qdict, "name

[Qemu-devel] [PATCH 5/9] Drop the incoming_expected global variable

2011-09-06 Thread Luiz Capitulino
Test against RSTATE_IN_MIGRATE instead.

Please, note that the RSTATE_IN_MIGRATE state is only set when all the
initial VM setup is done, while 'incoming_expected' was set right in
the beginning when parsing command-line options. Shouldn't be a problem
as far as I could check.

Signed-off-by: Luiz Capitulino 
---
 migration.c |2 --
 monitor.c   |2 +-
 vl.c|2 --
 3 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/migration.c b/migration.c
index f2499cf..a63e2a2 100644
--- a/migration.c
+++ b/migration.c
@@ -70,8 +70,6 @@ void process_incoming_migration(QEMUFile *f)
 qemu_announce_self();
 DPRINTF("successfully loaded vm state\n");
 
-incoming_expected = false;
-
 if (autostart) {
 vm_start();
 } else {
diff --git a/monitor.c b/monitor.c
index 3f979ad..afe858c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1311,7 +1311,7 @@ static int do_cont(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 {
 struct bdrv_iterate_context context = { mon, 0 };
 
-if (incoming_expected) {
+if (runstate_check(RSTATE_IN_MIGRATE)) {
 qerror_report(QERR_MIGRATION_EXPECTED);
 return -1;
 }
diff --git a/vl.c b/vl.c
index fe3628a..4cee50e 100644
--- a/vl.c
+++ b/vl.c
@@ -187,7 +187,6 @@ int nb_nics;
 NICInfo nd_table[MAX_NICS];
 int vm_running;
 int autostart;
-int incoming_expected; /* Started with -incoming and waiting for incoming */
 static int rtc_utc = 1;
 static int rtc_date_offset = -1; /* -1 means no change */
 QEMUClock *rtc_clock;
@@ -3115,7 +3114,6 @@ int main(int argc, char **argv, char **envp)
 break;
 case QEMU_OPTION_incoming:
 incoming = optarg;
-incoming_expected = true;
 break;
 case QEMU_OPTION_nodefaults:
 default_serial = 0;
-- 
1.7.7.rc0.72.g4b5ea




[Qemu-devel] [PATCH 2/9] Replace the VMSTOP macros with a proper state type

2011-09-06 Thread Luiz Capitulino
Today, when notifying a VM state change with vm_state_notify(),
we pass a VMSTOP macro as the 'reason' argument. This is not ideal
because the VMSTOP macros tell why qemu stopped and not exactly
what the current VM state is.

One example to demonstrate this problem is that vm_start() calls
vm_state_notify() with reason=0, which turns out to be VMSTOP_USER.

This commit fixes that by replacing the VMSTOP macros with a proper
state type called RunState.

Signed-off-by: Luiz Capitulino 
---
 audio/audio.c  |2 +-
 cpus.c |   10 +-
 gdbstub.c  |   30 +++---
 hw/ide/ahci.c  |2 +-
 hw/ide/core.c  |4 ++--
 hw/ide/internal.h  |3 ++-
 hw/ide/pci.c   |2 +-
 hw/kvmclock.c  |3 ++-
 hw/qxl.c   |5 +++--
 hw/scsi-disk.c |4 ++--
 hw/virtio-blk.c|5 +++--
 hw/virtio.c|2 +-
 hw/watchdog.c  |2 +-
 kvm-all.c  |2 +-
 migration.c|2 +-
 monitor.c  |4 ++--
 qemu-timer.c   |3 ++-
 savevm.c   |4 ++--
 sysemu.h   |   32 ++--
 target-i386/kvm.c  |2 +-
 ui/spice-display.c |3 ++-
 ui/spice-display.h |4 +++-
 vl.c   |   26 +-
 xen-all.c  |6 --
 24 files changed, 88 insertions(+), 74 deletions(-)

diff --git a/audio/audio.c b/audio/audio.c
index 5649075..50d0d71 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1743,7 +1743,7 @@ static int audio_driver_init (AudioState *s, struct 
audio_driver *drv)
 }
 
 static void audio_vm_change_state_handler (void *opaque, int running,
-   int reason)
+   RunState state)
 {
 AudioState *s = opaque;
 HWVoiceOut *hwo = NULL;
diff --git a/cpus.c b/cpus.c
index 54c188c..76a79ac 100644
--- a/cpus.c
+++ b/cpus.c
@@ -118,13 +118,13 @@ int cpu_is_stopped(CPUState *env)
 return !vm_running || env->stopped;
 }
 
-static void do_vm_stop(int reason)
+static void do_vm_stop(RunState state)
 {
 if (vm_running) {
 cpu_disable_ticks();
 vm_running = 0;
 pause_all_vcpus();
-vm_state_notify(0, reason);
+vm_state_notify(0, state);
 qemu_aio_flush();
 bdrv_flush_all();
 monitor_protocol_event(QEVENT_STOP, NULL);
@@ -878,10 +878,10 @@ void cpu_stop_current(void)
 }
 }
 
-void vm_stop(int reason)
+void vm_stop(RunState state)
 {
 if (!qemu_thread_is_self(&io_thread)) {
-qemu_system_vmstop_request(reason);
+qemu_system_vmstop_request(state);
 /*
  * FIXME: should not return to device code in case
  * vm_stop() has been requested.
@@ -889,7 +889,7 @@ void vm_stop(int reason)
 cpu_stop_current();
 return;
 }
-do_vm_stop(reason);
+do_vm_stop(state);
 }
 
 static int tcg_cpu_exec(CPUState *env)
diff --git a/gdbstub.c b/gdbstub.c
index 3b87c27..a308a76 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2267,7 +2267,7 @@ void gdb_set_stop_cpu(CPUState *env)
 }
 
 #ifndef CONFIG_USER_ONLY
-static void gdb_vm_state_change(void *opaque, int running, int reason)
+static void gdb_vm_state_change(void *opaque, int running, RunState state)
 {
 GDBState *s = gdbserver_state;
 CPUState *env = s->c_cpu;
@@ -2278,8 +2278,8 @@ static void gdb_vm_state_change(void *opaque, int 
running, int reason)
 if (running || s->state == RS_INACTIVE || s->state == RS_SYSCALL) {
 return;
 }
-switch (reason) {
-case VMSTOP_DEBUG:
+switch (state) {
+case RSTATE_DEBUG:
 if (env->watchpoint_hit) {
 switch (env->watchpoint_hit->flags & BP_MEM_ACCESS) {
 case BP_MEM_READ:
@@ -2302,25 +2302,25 @@ static void gdb_vm_state_change(void *opaque, int 
running, int reason)
 tb_flush(env);
 ret = GDB_SIGNAL_TRAP;
 break;
-case VMSTOP_USER:
+case RSTATE_PAUSED:
 ret = GDB_SIGNAL_INT;
 break;
-case VMSTOP_SHUTDOWN:
+case RSTATE_SHUTDOWN:
 ret = GDB_SIGNAL_QUIT;
 break;
-case VMSTOP_DISKFULL:
+case RSTATE_IO_ERROR:
 ret = GDB_SIGNAL_IO;
 break;
-case VMSTOP_WATCHDOG:
+case RSTATE_WATCHDOG:
 ret = GDB_SIGNAL_ALRM;
 break;
-case VMSTOP_PANIC:
+case RSTATE_PANICKED:
 ret = GDB_SIGNAL_ABRT;
 break;
-case VMSTOP_SAVEVM:
-case VMSTOP_LOADVM:
+case RSTATE_SAVEVM:
+case RSTATE_RESTORE:
 return;
-case VMSTOP_MIGRATE:
+case RSTATE_PRE_MIGRATE:
 ret = GDB_SIGNAL_XCPU;
 break;
 default:
@@ -2357,7 +2357,7 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const 
char *fmt, ...)
 gdb_current_syscall_cb = cb;
 s->state = RS_SYSCALL;
 #ifndef CONFIG_USER_ONLY
-vm_stop(VMSTOP_DEBUG);
+vm_stop(RSTATE_DEBUG);
 #endif
 s->state = RS_IDLE;
 va_start(va, fmt);
@@ -2431,7 +2431,7 @@ stati

[Qemu-devel] [PATCH 7/9] Monitor/QMP: Don't allow cont on bad VM state

2011-09-06 Thread Luiz Capitulino
We have two states where issuing cont before system_reset can
cause problems: RSTATE_SHUTDOWN (when -no-shutdown is used) and
RSTATE_PANICKED (which only happens with kvm).

This commit fixes that by doing the following when state is
RSTATE_SHUTDOWN or RSTATE_PANICKED:

 1. returning an error to the user/client if cont is issued
 2. automatically transition to RSTATE_PAUSED during system_reset

Signed-off-by: Luiz Capitulino 
---
 monitor.c |5 +
 qerror.c  |4 
 qerror.h  |3 +++
 vl.c  |4 
 4 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/monitor.c b/monitor.c
index 10cab1d..6e5bb56 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1314,7 +1314,12 @@ static int do_cont(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 if (runstate_check(RSTATE_IN_MIGRATE)) {
 qerror_report(QERR_MIGRATION_EXPECTED);
 return -1;
+} else if (runstate_check(RSTATE_PANICKED) ||
+   runstate_check(RSTATE_SHUTDOWN)) {
+qerror_report(QERR_RESET_REQUIRED);
+return -1;
 }
+
 bdrv_iterate(encrypted_bdrv_it, &context);
 /* only resume the vm if all keys are set and valid */
 if (!context.err) {
diff --git a/qerror.c b/qerror.c
index 3d64b80..c591a54 100644
--- a/qerror.c
+++ b/qerror.c
@@ -194,6 +194,10 @@ static const QErrorStringTable qerror_table[] = {
 .desc  = "QMP input object member '%(member)' is unexpected",
 },
 {
+.error_fmt = QERR_RESET_REQUIRED,
+.desc  = "Resetting the Virtual Machine is required",
+},
+{
 .error_fmt = QERR_SET_PASSWD_FAILED,
 .desc  = "Could not set password",
 },
diff --git a/qerror.h b/qerror.h
index 8058456..d407001 100644
--- a/qerror.h
+++ b/qerror.h
@@ -163,6 +163,9 @@ QError *qobject_to_qerror(const QObject *obj);
 #define QERR_QMP_EXTRA_MEMBER \
 "{ 'class': 'QMPExtraInputObjectMember', 'data': { 'member': %s } }"
 
+#define QERR_RESET_REQUIRED \
+"{ 'class': 'ResetRequired', 'data': {} }"
+
 #define QERR_SET_PASSWD_FAILED \
 "{ 'class': 'SetPasswdFailed', 'data': {} }"
 
diff --git a/vl.c b/vl.c
index b1f3352..917c348 100644
--- a/vl.c
+++ b/vl.c
@@ -1650,6 +1650,10 @@ static void main_loop(void)
 cpu_synchronize_all_states();
 qemu_system_reset(VMRESET_REPORT);
 resume_all_vcpus();
+if (runstate_check(RSTATE_PANICKED) ||
+runstate_check(RSTATE_SHUTDOWN)) {
+runstate_set(RSTATE_PAUSED);
+}
 }
 if (qemu_powerdown_requested()) {
 monitor_protocol_event(QEVENT_POWERDOWN, NULL);
-- 
1.7.7.rc0.72.g4b5ea




[Qemu-devel] [PATCH 4/9] runstate_set(): Check for valid transitions

2011-09-06 Thread Luiz Capitulino
This commit could have been folded with the previous one, however
doing it separately will allow for easy bisect and revert if needed.

Checking and testing all valid transitions wasn't trivial, chances
are this will need broader testing to become more stable.

Signed-off-by: Luiz Capitulino 
---
 vl.c |  149 +-
 1 files changed, 148 insertions(+), 1 deletions(-)

diff --git a/vl.c b/vl.c
index 9926d2a..fe3628a 100644
--- a/vl.c
+++ b/vl.c
@@ -332,9 +332,156 @@ bool runstate_check(RunState state)
 return current_run_state == state;
 }
 
+/* This function will abort() on invalid state transitions */
 void runstate_set(RunState new_state)
 {
-assert(new_state < RSTATE_MAX);
+switch (current_run_state) {
+case RSTATE_NO_STATE:
+switch (new_state) {
+case RSTATE_RUNNING:
+case RSTATE_IN_MIGRATE:
+case RSTATE_PRE_LAUNCH:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_DEBUG:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_IN_MIGRATE:
+switch (new_state) {
+case RSTATE_RUNNING:
+case RSTATE_PRE_LAUNCH:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_PANICKED:
+switch (new_state) {
+case RSTATE_PAUSED:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_IO_ERROR:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_PAUSED:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_POST_MIGRATE:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_PRE_LAUNCH:
+switch (new_state) {
+case RSTATE_RUNNING:
+case RSTATE_POST_MIGRATE:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_PRE_MIGRATE:
+switch (new_state) {
+case RSTATE_RUNNING:
+case RSTATE_POST_MIGRATE:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_RESTORE:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_RUNNING:
+switch (new_state) {
+case RSTATE_DEBUG:
+case RSTATE_PANICKED:
+case RSTATE_IO_ERROR:
+case RSTATE_PAUSED:
+case RSTATE_PRE_MIGRATE:
+case RSTATE_RESTORE:
+case RSTATE_SAVEVM:
+case RSTATE_SHUTDOWN:
+case RSTATE_WATCHDOG:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_SAVEVM:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_SHUTDOWN:
+switch (new_state) {
+case RSTATE_PAUSED:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+case RSTATE_WATCHDOG:
+switch (new_state) {
+case RSTATE_RUNNING:
+goto transition_ok;
+default:
+/* invalid transition */
+abort();
+}
+abort();
+default:
+fprintf(stderr, "current run state is invalid: %s\n",
+runstate_as_string());
+abort();
+}
+
+transition_ok:
 current_run_state = new_state;
 }
 
-- 
1.7.7.rc0.72.g4b5ea




[Qemu-devel] [PATCH] linux-user: Exit with an error if we couldn't set up gdbserver

2011-09-06 Thread Peter Maydell
If gdbserver_start() fails (usually because we couldn't bind to the
requested TCP port) then exit qemu rather than blithely continuing.
This brings the linux-user behaviour in to line with system mode.

Signed-off-by: Peter Maydell 
---
 linux-user/main.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index 89a51d7..7268997 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3568,7 +3568,11 @@ int main(int argc, char **argv, char **envp)
 #endif
 
 if (gdbstub_port) {
-gdbserver_start (gdbstub_port);
+if (gdbserver_start(gdbstub_port) < 0) {
+fprintf(stderr, "qemu: could not open gdbserver on port %d\n",
+gdbstub_port);
+exit(1);
+}
 gdb_handlesig(env, 0);
 }
 cpu_loop(env);
-- 
1.7.1




Re: [Qemu-devel] [PATCH 3/8] RunState: Add additional states

2011-09-06 Thread Luiz Capitulino
On Fri, 2 Sep 2011 11:28:18 -0300
Luiz Capitulino  wrote:

> On Thu, 01 Sep 2011 22:58:51 +0200
> Jan Kiszka  wrote:
> 
> > On 2011-09-01 20:39, Luiz Capitulino wrote:
> > > On Thu, 01 Sep 2011 20:30:57 +0200
> > > Jan Kiszka  wrote:
> > > 
> > >> On 2011-09-01 20:12, Luiz Capitulino wrote:
> > >>> Currently, only vm_start() and vm_stop() change the VM state.
> > >>> That's, the state is only changed when starting or stopping the VM.
> > >>>
> > >>> This commit adds the runstate_set() function, which makes it possible
> > >>> to also do state transitions when the VM is stopped or running.
> > >>>
> > >>> Additional states are also added and the current state is stored.
> > >>>
> > >>> Signed-off-by: Luiz Capitulino 
> > >>> ---
> > >>>  cpus.c  |1 +
> > >>>  migration.c |8 +++-
> > >>>  sysemu.h|   10 +-
> > >>>  vl.c|   20 
> > >>>  4 files changed, 37 insertions(+), 2 deletions(-)
> > >>>
> > >>
> > >> ...
> > >>
> > >>> diff --git a/vl.c b/vl.c
> > >>> index f0b56a4..59f71fc 100644
> > >>> --- a/vl.c
> > >>> +++ b/vl.c
> > >>> @@ -321,6 +321,22 @@ static int default_driver_check(QemuOpts *opts, 
> > >>> void *opaque)
> > >>>  }
> > >>>  
> > >>>  /***/
> > >>> +/* QEMU state */
> > >>> +
> > >>> +static RunState current_run_state = RSTATE_NO_STATE;
> > >>> +
> > >>> +bool runstate_check(RunState state)
> > >>> +{
> > >>> +return current_run_state == state;
> > >>> +}
> > >>> +
> > >>> +void runstate_set(RunState state)
> > >>> +{
> > >>> +assert(state < RSTATE_MAX);
> > >>> +current_run_state = state;
> > >>
> > >> I still think this should check for valid state transitions instead of
> > >> blindly accepting what the caller passes in.
> > > 
> > > I thought your comment where more like a future enhancement than
> > > a request for change.
> > 
> > I think we want this now to document at a central place which
> > transitions are valid and which not. State machines without such checks
> > break sooner or later, subtly.
> 
> Ok, I'll do it.

I did it in v4 (just sent). But it wasn't trivial to do and to test, so
a careful review of that patch is very appreciated.



[Qemu-devel] [PATCH 8/9] QMP: query-status: Introduce 'status' key

2011-09-06 Thread Luiz Capitulino
This new key reports the current VM status to clients. Please, check
the documentation being added in this commit for more details.

Signed-off-by: Luiz Capitulino 
---
 monitor.c   |3 +--
 qmp-commands.hx |   19 ++-
 sysemu.h|1 +
 vl.c|   23 +++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/monitor.c b/monitor.c
index 6e5bb56..7684ba9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2636,8 +2636,7 @@ static void do_info_status_print(Monitor *mon, const 
QObject *data)
 
 static void do_info_status(Monitor *mon, QObject **ret_data)
 {
-*ret_data = qobject_from_jsonf("{ 'running': %i, 'singlestep': %i }",
-runstate_is_running(), singlestep);
+*ret_data = qobject_from_jsonf("{ 'running': %i, 'singlestep': %i, 
'status': %s }", runstate_is_running(), singlestep, runstate_as_string());
 }
 
 static qemu_acl *find_acl(Monitor *mon, const char *name)
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 27cc66e..0cf1321 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1571,11 +1571,28 @@ Return a json-object with the following information:
 - "running": true if the VM is running, or false if it is paused (json-bool)
 - "singlestep": true if the VM is in single step mode,
 false otherwise (json-bool)
+- "status": one of the following values (json-string)
+"debug" - QEMU is running on a debugger
+"inmigrate" - guest is paused waiting for an incoming migration
+"internal-error" - An internal error that prevents further guest
+execution has occurred
+"io-error" - the last IOP has failed and the device is configured
+to pause on I/O errors
+"paused" - guest has been paused via the 'stop' command
+"postmigrate" - guest is paused following a successful 'migrate'
+"prelaunch" - QEMU was started with -S and guest has not started
+"finish-migrate" - guest is paused to finish the migration process
+"restore-vm" - guest is paused to restore VM state
+"running" - guest is actively running
+"save-vm" - guest is paused to save the VM state
+"shutdown" - guest is shut down (and -no-shutdown is in use)
+"watchdog" - the watchdog action is configured to pause and
+ has been triggered
 
 Example:
 
 -> { "execute": "query-status" }
-<- { "return": { "running": true, "singlestep": false } }
+<- { "return": { "running": true, "singlestep": false, "status": "running" } }
 
 EQMP
 
diff --git a/sysemu.h b/sysemu.h
index c525bbe..635104e 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -38,6 +38,7 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid);
 bool runstate_check(RunState state);
 void runstate_set(RunState new_state);
 int runstate_is_running(void);
+const char *runstate_as_string(void);
 typedef struct vm_change_state_entry VMChangeStateEntry;
 typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
 
diff --git a/vl.c b/vl.c
index 917c348..4867586 100644
--- a/vl.c
+++ b/vl.c
@@ -323,6 +323,22 @@ static int default_driver_check(QemuOpts *opts, void 
*opaque)
 /***/
 /* QEMU state */
 
+static const char *const runstate_name_tbl[RSTATE_MAX] = {
+[RSTATE_DEBUG] = "debug",
+[RSTATE_IN_MIGRATE] = "incoming-migration",
+[RSTATE_PANICKED] = "internal-error",
+[RSTATE_IO_ERROR] = "io-error",
+[RSTATE_PAUSED] = "paused",
+[RSTATE_POST_MIGRATE] = "post-migrate",
+[RSTATE_PRE_LAUNCH] = "prelaunch",
+[RSTATE_PRE_MIGRATE] = "finish-migrate",
+[RSTATE_RESTORE] = "restore-vm",
+[RSTATE_RUNNING] = "running",
+[RSTATE_SAVEVM] = "save-vm",
+[RSTATE_SHUTDOWN] = "shutdown",
+[RSTATE_WATCHDOG] = "watchdog",
+};
+
 static RunState current_run_state = RSTATE_NO_STATE;
 
 bool runstate_check(RunState state)
@@ -483,6 +499,13 @@ transition_ok:
 current_run_state = new_state;
 }
 
+const char *runstate_as_string(void)
+{
+assert(current_run_state > RSTATE_NO_STATE &&
+   current_run_state < RSTATE_MAX);
+return runstate_name_tbl[current_run_state];
+}
+
 int runstate_is_running(void)
 {
 return runstate_check(RSTATE_RUNNING);
-- 
1.7.7.rc0.72.g4b5ea




[Qemu-devel] [PATCH 3/9] RunState: Add additional states

2011-09-06 Thread Luiz Capitulino
Currently, only vm_start() and vm_stop() change the VM state.
That's, the state is only changed when starting or stopping the VM.

This commit adds the runstate_set() function, which makes it possible
to also do state transitions when the VM is stopped or running.

Additional states are also added and the current state is stored.

Signed-off-by: Luiz Capitulino 
---
 cpus.c  |1 +
 migration.c |8 +++-
 sysemu.h|9 -
 vl.c|   20 
 4 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/cpus.c b/cpus.c
index 76a79ac..3444bc0 100644
--- a/cpus.c
+++ b/cpus.c
@@ -124,6 +124,7 @@ static void do_vm_stop(RunState state)
 cpu_disable_ticks();
 vm_running = 0;
 pause_all_vcpus();
+runstate_set(state);
 vm_state_notify(0, state);
 qemu_aio_flush();
 bdrv_flush_all();
diff --git a/migration.c b/migration.c
index 29f1a76..f2499cf 100644
--- a/migration.c
+++ b/migration.c
@@ -72,8 +72,11 @@ void process_incoming_migration(QEMUFile *f)
 
 incoming_expected = false;
 
-if (autostart)
+if (autostart) {
 vm_start();
+} else {
+runstate_set(RSTATE_PRE_LAUNCH);
+}
 }
 
 int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
@@ -390,6 +393,9 @@ void migrate_fd_put_ready(void *opaque)
 }
 state = MIG_STATE_ERROR;
 }
+if (state == MIG_STATE_COMPLETED) {
+runstate_set(RSTATE_POST_MIGRATE);
+}
 s->state = state;
 notifier_list_notify(&migration_state_notifiers, NULL);
 }
diff --git a/sysemu.h b/sysemu.h
index a3846ca..19088aa 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -11,16 +11,21 @@
 /* vl.c */
 
 typedef enum {
+RSTATE_NO_STATE,
 RSTATE_DEBUG,  /* qemu is running under gdb */
+RSTATE_IN_MIGRATE, /* paused waiting for an incoming migration */
 RSTATE_PANICKED,   /* paused due to an internal error */
 RSTATE_IO_ERROR,   /* paused due to an I/O error */
 RSTATE_PAUSED, /* paused by the user (ie. the 'stop' command) */
+RSTATE_POST_MIGRATE,   /* paused following a successful migration */
+RSTATE_PRE_LAUNCH, /* qemu was started with -S and haven't started */
 RSTATE_PRE_MIGRATE,/* paused preparing to finish migrate */
 RSTATE_RESTORE,/* paused restoring the VM state */
 RSTATE_RUNNING,/* qemu is running */
 RSTATE_SAVEVM, /* paused saving VM state */
 RSTATE_SHUTDOWN,   /* guest shut down and -no-shutdown is in use */
-RSTATE_WATCHDOG/* watchdog fired and qemu is configured to pause */
+RSTATE_WATCHDOG,   /* watchdog fired and qemu is configured to pause */
+RSTATE_MAX
 } RunState;
 
 extern const char *bios_name;
@@ -31,6 +36,8 @@ extern uint8_t qemu_uuid[];
 int qemu_uuid_parse(const char *str, uint8_t *uuid);
 #define UUID_FMT 
"%02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
 
+bool runstate_check(RunState state);
+void runstate_set(RunState new_state);
 typedef struct vm_change_state_entry VMChangeStateEntry;
 typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
 
diff --git a/vl.c b/vl.c
index ca7d3c4..9926d2a 100644
--- a/vl.c
+++ b/vl.c
@@ -323,6 +323,22 @@ static int default_driver_check(QemuOpts *opts, void 
*opaque)
 }
 
 /***/
+/* QEMU state */
+
+static RunState current_run_state = RSTATE_NO_STATE;
+
+bool runstate_check(RunState state)
+{
+return current_run_state == state;
+}
+
+void runstate_set(RunState new_state)
+{
+assert(new_state < RSTATE_MAX);
+current_run_state = new_state;
+}
+
+/***/
 /* real time host monotonic timer */
 
 /***/
@@ -1161,6 +1177,7 @@ void vm_start(void)
 if (!vm_running) {
 cpu_enable_ticks();
 vm_running = 1;
+runstate_set(RSTATE_RUNNING);
 vm_state_notify(1, RSTATE_RUNNING);
 resume_all_vcpus();
 monitor_protocol_event(QEVENT_RESUME, NULL);
@@ -3437,6 +3454,7 @@ int main(int argc, char **argv, char **envp)
 }
 
 if (incoming) {
+runstate_set(RSTATE_IN_MIGRATE);
 int ret = qemu_start_incoming_migration(incoming);
 if (ret < 0) {
 fprintf(stderr, "Migration failed. Exit code %s(%d), exiting.\n",
@@ -3445,6 +3463,8 @@ int main(int argc, char **argv, char **envp)
 }
 } else if (autostart) {
 vm_start();
+} else {
+runstate_set(RSTATE_PRE_LAUNCH);
 }
 
 os_setup_post();
-- 
1.7.7.rc0.72.g4b5ea




Re: [Qemu-devel] [PATCH] scsi: refine constants for READ CAPACITY 16

2011-09-06 Thread Kevin Wolf
Am 06.09.2011 12:31, schrieb Paolo Bonzini:
> Rename SERVICE_ACTION_IN to SERVICE_ACTION_IN_16 to distinguish
> from the 12-byte CDB variant, and add a constant for the subcommand.
> 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH] scsi: fill in additional sense length correctly

2011-09-06 Thread Kevin Wolf
Am 06.09.2011 12:31, schrieb Paolo Bonzini:
> Even though we do not use them, we should include the last three
> bytes of sense data in the additional sense length.
> 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin



Re: [Qemu-devel] [PATCH] scsi: improve MODE SENSE emulation

2011-09-06 Thread Kevin Wolf
Am 06.09.2011 12:31, schrieb Paolo Bonzini:
> - do not return extra pages when requesting all pages (PAGE CODE = 0x3f)
> 
> - return correct sense code for PC = 3 (saved parameters not supported)
> 
> - do not return geometry pages for CD devices
> 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin



[Qemu-devel] [PATCH 9/9] HMP: info status: Print the VM state

2011-09-06 Thread Luiz Capitulino
Today our printf format for the "info status" command is:

  VM status: %s

Where the string can be "running", "running (single step mode)" or
"paused".

This commit extends it to:

 VM status: %s (%s)

The second string corresponds to the "status" field as returned
by the query-status QMP command and it's only printed if "status"
is not "running" or "paused".

Example:

 VM status: paused (shutdown)

PS: libvirt uses "info status" when using HMP, but the new format
should not break it.

Signed-off-by: Luiz Capitulino 
---
 monitor.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/monitor.c b/monitor.c
index 7684ba9..860530b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2618,6 +2618,7 @@ static int do_inject_nmi(Monitor *mon, const QDict 
*qdict, QObject **ret_data)
 static void do_info_status_print(Monitor *mon, const QObject *data)
 {
 QDict *qdict;
+const char *status;
 
 qdict = qobject_to_qdict(data);
 
@@ -2631,6 +2632,11 @@ static void do_info_status_print(Monitor *mon, const 
QObject *data)
 monitor_printf(mon, "paused");
 }
 
+status = qdict_get_str(qdict, "status");
+if (strcmp(status, "paused") && strcmp(status, "running")) {
+monitor_printf(mon, " (%s)", status);
+}
+
 monitor_printf(mon, "\n");
 }
 
-- 
1.7.7.rc0.72.g4b5ea




Re: [Qemu-devel] KVM call agenda for September 6

2011-09-06 Thread Juan Quintela
Juan Quintela  wrote:
> Hi
>
> Please send in any agenda items you are interested in covering.

To agenda items.
Anthony off.

Call got cancelled.

Happy hacking, Juan.



Re: [Qemu-devel] [Qemu-trivial] [PATCH] docs: Fix qdev-device-use.txt typo in -chardev serial, path=COM

2011-09-06 Thread Stefan Hajnoczi
On Tue, Sep 06, 2011 at 10:01:36AM +0200, Markus Armbruster wrote:
> 
> Signed-off-by: Markus Armbruster 
> ---
>  docs/qdev-device-use.txt |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)

Thanks, applied to the trivial patches -next tree:
http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/trivial-patches-next

Stefan



Re: [Qemu-devel] [Qemu-trivial] [PATCH] configure: Avoid duplicate flags when calling compile_prog

2011-09-06 Thread Stefan Hajnoczi
On Sun, Sep 04, 2011 at 06:30:02PM +0200, Stefan Weil wrote:
> This patch removes redundant shell code and cleans it a little bit.
> 
> Shell macro compile_prog takes two arguments:
> local_cflags and local ldflags.
> 
> $QEMU_CFLAGS is added automatically to the cflags, so there is no need
> to pass it as an argument.
> 
> It is also unnecessary to pass -Werror twice.
> 
> $flag is a compiler warning option, so it should be in local_cflags
> instead of local_ldflags.
> 
> Signed-off-by: Stefan Weil 
> ---
>  configure |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)

Thanks, applied to the trivial patches -next tree:
http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/trivial-patches-next

Stefan



Re: [Qemu-devel] [Qemu-trivial] [PATCH] docs: Fix qdev-device-use.txt typo in -chardev serial, path=COM

2011-09-06 Thread Stefan Hajnoczi
On Tue, Sep 06, 2011 at 10:01:36AM +0200, Markus Armbruster wrote:
> 
> Signed-off-by: Markus Armbruster 
> ---
>  docs/qdev-device-use.txt |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)

Thanks, applied to the trivial patches -next tree:
http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/trivial-patches-next

Stefan



Re: [Qemu-devel] [PATCH] qemu-options: Improve help texts for options which depend on configure

2011-09-06 Thread Stefan Hajnoczi
On Mon, Sep 05, 2011 at 06:13:03PM +0200, Stefan Weil wrote:
> * Replace "available only" by the more common "only available".
> 
> * Tracing options depend on the configuration of the QEMU executable,
>   so clarify the help text for both options.
> 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Stefan Weil 
> ---
>  qemu-options.hx |   10 +-
>  1 files changed, 5 insertions(+), 5 deletions(-)

Thanks, applied to the trivial patches -next tree:
http://repo.or.cz/w/qemu/stefanha.git/shortlog/refs/heads/trivial-patches-next

Stefan



Re: [Qemu-devel] [PATCH 1/2] trace: allow trace events with string arguments

2011-09-06 Thread Stefan Hajnoczi
On Mon, Sep 05, 2011 at 07:45:36PM +, Blue Swirl wrote:
> On Mon, Sep 5, 2011 at 3:37 PM, Stefan Hajnoczi
>  wrote:
> > String arguments are useful for producing human-readable traces without
> > post-processing (e.g. stderr backend).  Although the simple backend
> > cannot handles strings all others can.  Strings should be allowed and
> > the simple backend can be extended to support them.
> 
> I don't think this is possible in general. Yes if the string can be
> found in the executable (assuming address space randomizations don't
> make that impossible post run), but not if the string happens to be
> constructed in the stack or in the data segment during run time.
> 
> So I'd still at least strongly discourage string use.

I don't like strings either because they introduce variable-length data
that can be expensive to fetch.  But it's perfectly doable: stderr and
ust are in-process tracers that have easy access to the string no matter
where it is located in memory.  In SystemTap you only have the char*
argument but can use the userspace string copy-in function to fetch the
actual string (again, no matter where it lives in the process' address
space).

If your primary trace backend is stderr or ust it makes sense to use
them.  If you use SystemTap or simpletrace where it is relatively easy
to do post-processing, then it's lighter weight and nicer (IMO) to
record just the address and leave pretty-printing to a post-processing
step.

We can add string support to simpletrace by creating a new file format
version (or moving to a standarized trace file format).  The format
needs to support variable-length trace records, which the current format
cannot support.  Then tracetool can detect char* type arguments and add
strcpy code into the generated trace_*().

Anyway, tracing should be an aid to developers and users.  It should
make USB development easier for Gerd.  We need to compete with fprintf
here :).

I don't think there is a technical reason why strings are not possible,
it turns out only simpletrace can't do them and that's my fault.

Stefan



Re: [Qemu-devel] [PATCH 2/2] main: switch qemu_set_fd_handler to g_io_add_watch

2011-09-06 Thread Paolo Bonzini

On 08/22/2011 03:47 PM, Paolo Bonzini wrote:

On 08/22/2011 03:45 PM, Anthony Liguori wrote:


Almost: in Win32 you need to use g_io_channel_win32_new_socket. But
indeed on Windows you can only use qemu_set_fd_handler for sockets too.


I think that's really only for read/write though. If you're just
polling on I/O, it shouldn't matter IIUC.

If someone has a Windows box, they can confirm/deny by using qemu
-monitor tcp:localhost:1024,socket,nowait with this patch.


Actually you're right, it works automagically:

* On Win32, this can be used either for files opened with the MSVCRT
* (the Microsoft run-time C library) _open() or _pipe, including file
* descriptors 0, 1 and 2 (corresponding to stdin, stdout and stderr),
* or for Winsock SOCKETs. If the parameter is a legal file
* descriptor, it is assumed to be such, otherwise it should be a
* SOCKET. This relies on SOCKETs and file descriptors not
* overlapping. If you want to be certain, call either
* g_io_channel_win32_new_fd() or g_io_channel_win32_new_socket()
* instead as appropriate.

So this patch would even let interested people enable exec migration on
Windows.


Hmmm, after reading documentation better, this unfortunately is 
completely broken under Windows, for two reasons:


1) in patch 1/2 you're using the glib pollfds and passing them to 
select().  Unfortunately under Windows they are special and can only be 
passed to g_poll().  Unfortunately, this can be fixed by changing the 
QEMU main loop to use poll() instead of select()...


2) ... because glib IO channels cannot be used just for watches under 
Windows:


/* Create an IO channel for C runtime (emulated Unix-like) file
 * descriptors. After calling g_io_add_watch() on a IO channel
 * returned by this function, you shouldn't call read() on the file
 * descriptor. This is because adding polling for a file descriptor is
 * implemented on Win32 by starting a thread that sits blocked in a
 * read() from the file descriptor most of the time. All reads from
 * the file descriptor should be done by this internal GLib
 * thread. Your code should call only g_io_channel_read().
 */

So, I believe the right solution would be to drop this patch for now and 
make 1/2 conditional on !_WIN32.


Paolo



Re: [Qemu-devel] glib mainloop breaks virtfs

2011-09-06 Thread Anthony Liguori

On 09/06/2011 06:22 AM, Gerd Hoffmann wrote:

Hi,

virtfs stopped working for me in master, the guest (fedora 15) just
hangs at boot when mounting the virtfs filesystems. Bisecting points to
this commit:

rincewind kraxel ~/projects/qemu ((69e5bb6...)|BISECTING)# git bisect good
4d88a2ac8643265108ef1fb47ceee5d7b28e19f2 is the first bad commit
commit 4d88a2ac8643265108ef1fb47ceee5d7b28e19f2
Author: Anthony Liguori 
Date: Mon Aug 22 08:12:53 2011 -0500

main: switch qemu_set_fd_handler to g_io_add_watch


The v9fs code uses qemu_set_fd_handler to trigger coroutines.  I suspect 
this is not going to be a fun one to debug.


This changeset changes the ordering of when callbacks are fired so it 
may be triggering a latent bug in the coroutine usage in virtio-9p. 
Aneesh, can you take a look at it?


Regards,

Anthony Liguori



cheers,
Gerd






Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Michael S. Tsirkin
On Fri, Aug 26, 2011 at 04:48:10PM +0200, Jan Kiszka wrote:
> More than one year ago I posted some patches to add a monitor command
> callend device_show. The purpose of that command is to dump the state of
> some qdev device based on its vmstate.
> 
> To improve the usability of that interface, the previous series also
> tried to create a canonical qdev tree path name format and even added
> monitor command expansion. That format created quite a few discussions,
> and we never came to some conclusion.
> 
> As device_show is still a useful tool for debugging but qdev structuring
> and addressing may significantly change in the near future, I decided to
> reanimate those patches while avoiding almost any change of the
> addressing scheme. The only one I propose is automatic ID assignment for
> anonymous devices so that any qdev device is in principle addressable
> (e.g. APIC and IO-APIC on x86).
> 
> As back then, device_show remains HMP-only to avoid any stable ABI
> issues around QMP.

I'm afraid that won't be enough to stop people
scripting this command - libvirt accessed
HMP for years.

On the other hand, no QMP command means e.g.
libvirt users don't get any benefit from this.

What I think will solve these problems, for both HMP and QMP,
is an explicit 'debug_unstable' or 'debug_unsupported' command that will
expose all kind of debugging functionality making it
very explicit that it's an unsupported debugging utility.

Proposed syntax:

debug_unstable  

Example:

debug_unstable device_show -all


-- 
MSt



Re: [Qemu-devel] [PATCH V12 00/15] virtio-9p: chroot environment for passthrough security model

2011-09-06 Thread Stefan Hajnoczi
On Mon, Sep 05, 2011 at 09:48:21PM +0530, M. Mohan Kumar wrote:
> Qemu need to be invoked by root user for using virtfs with passthrough
> security model (i.e to use chroot() syscall).
> 
> Question is: Is running qemu by root user expected and allowed? Some of the
> virtfs features can be utilized only if qemu is started by root user (for
> example passthrough security model and handle based file driver need root
> privilege).
> 
> This issue can be resolved by root user starting qemu and spawning a process
> with root privilege to do all privileged operations there and main qemu
> process dropping its privileges to avoid any security issue in running qemu in
> root mode. Privileged operations can be done similar to the chroot patchset.
> 
> But how to determine to which user-id(ie non root user id) qemu needs to drop
> the privileges? Do we have a common user-id across all distributions/systems
> to which qemu process can be dropped down? Also it becomes too complex i.e 
> when
> a new feature needing root privilege is added, a process with root privilege
> needs to be created to handle this.
> 
> So is it allowed to run qemu by root user? If no, is it okay to add the
> complexity of adding a root privilege process for each feature that needs root
> privilege?

I believe libvirt performs the privilege dropping itself and then
invokes QEMU.  So in the context of KVM + libvirt we do not have
privileges in QEMU.  Of course the administrator can edit
/etc/libvirt/qemu.conf and configure the user to run QEMU as (i.e.
root).  But the intention here is to run QEMU unprivileged.

QEMU has its own -runas switch which may be used when QEMU is run
directly by a user or by custom scripts.  This switch looks up the user
and switches to their uid/gid/groups.

We need to think carefully before adding privileged features to QEMU
since they usually require extra configuration to safely limit the group
of users who may use the feature.  These features will be unavailable to
unprivileged users on a system.

The main part of QEMU (vcpu execution and device emulation) should never
run privileged.  This way attacks on QEMU's code are limited to giving
unprivileged access on the host.

A virtfs feature that needs root therefore needs to be in a separate
process.  Either QEMU needs to fork or virtfs could use a separate
daemon binary.

You have already implemented the fork approach in the chroot patches.
Handle-based open could work in the same way.

To summarize this architecture: all path-related operations are
performed by a separate privileged process.  File descriptors are passed
to QEMU over a UNIX domain socket.  This way QEMU can do the actual
read(2)/write(2) calls directly to/from guest memory.

I think it would be nice to build a completely separate binary that QEMU
connects to.  The separate binary would have a much smaller footprint
(doesn't include QEMU code).  More importantly the
privileged/unprivileged boundary would be simple and could be
automatically set up by libvirt:

$ sudo namespace_helper --sock /var/run/virtfs/1234.sock --export my_dir/
$ qemu -fsdev local,id=my_fs,namespace_helper=/var/run/virtfs/1234.sock \
   -device virtio-9p-pci,fsdev=my_fs

Stefan



Re: [Qemu-devel] TCG sar UB (fwd)

2011-09-06 Thread Richard Henderson
On 09/04/2011 08:03 AM, malc wrote:
> On Sun, 4 Sep 2011, Richard Henderson wrote:
> 
>> On 09/03/2011 03:47 PM, malc wrote:
>>> Doesn't make much sense to me, guest clearly asked for 0 and not -1,
>>> besides -1 violates TCG's sar constraints and PPC obliges by emiting
>>> illegal instruction in this case.
>>
>> The shift that the guest asked for was completely folded away.
>>
>> The -1 comes from gen_shift_rm_T1 in the computation of the new
>> flags value.  This could instead be moved inside the test for != 0,
>> which is the only place that value is actually used anyway.
>>
>> Try this.  Lightly tested.
> 
> Now i either get hosts illegal instruction or (with logging enabled) a
> guest kenrnel panic.

Hum.  Well, it's possible I made some silly mistake in the patch.
You now know where to look for the problem.

That said, it seems easier for you in the PPC port to mask the 
immediate operand so that you don't produce an illegal instruction.
It doesn't really matter what value that insn produces, as the
result won't be used.


r~



Re: [Qemu-devel] [PATCH V12 00/15] virtio-9p: chroot environment for passthrough security model

2011-09-06 Thread Stefan Hajnoczi
Sorry, I forgot to include Daniel Berrange who might have thoughts
about a nice way of running the privileged virtfs helper and how to
integrate with libvirt.

On Tue, Sep 6, 2011 at 3:48 PM, Stefan Hajnoczi  wrote:
> On Mon, Sep 05, 2011 at 09:48:21PM +0530, M. Mohan Kumar wrote:
>> Qemu need to be invoked by root user for using virtfs with passthrough
>> security model (i.e to use chroot() syscall).
>>
>> Question is: Is running qemu by root user expected and allowed? Some of the
>> virtfs features can be utilized only if qemu is started by root user (for
>> example passthrough security model and handle based file driver need root
>> privilege).
>>
>> This issue can be resolved by root user starting qemu and spawning a process
>> with root privilege to do all privileged operations there and main qemu
>> process dropping its privileges to avoid any security issue in running qemu 
>> in
>> root mode. Privileged operations can be done similar to the chroot patchset.
>>
>> But how to determine to which user-id(ie non root user id) qemu needs to drop
>> the privileges? Do we have a common user-id across all distributions/systems
>> to which qemu process can be dropped down? Also it becomes too complex i.e 
>> when
>> a new feature needing root privilege is added, a process with root privilege
>> needs to be created to handle this.
>>
>> So is it allowed to run qemu by root user? If no, is it okay to add the
>> complexity of adding a root privilege process for each feature that needs 
>> root
>> privilege?
>
> I believe libvirt performs the privilege dropping itself and then
> invokes QEMU.  So in the context of KVM + libvirt we do not have
> privileges in QEMU.  Of course the administrator can edit
> /etc/libvirt/qemu.conf and configure the user to run QEMU as (i.e.
> root).  But the intention here is to run QEMU unprivileged.
>
> QEMU has its own -runas switch which may be used when QEMU is run
> directly by a user or by custom scripts.  This switch looks up the user
> and switches to their uid/gid/groups.
>
> We need to think carefully before adding privileged features to QEMU
> since they usually require extra configuration to safely limit the group
> of users who may use the feature.  These features will be unavailable to
> unprivileged users on a system.
>
> The main part of QEMU (vcpu execution and device emulation) should never
> run privileged.  This way attacks on QEMU's code are limited to giving
> unprivileged access on the host.
>
> A virtfs feature that needs root therefore needs to be in a separate
> process.  Either QEMU needs to fork or virtfs could use a separate
> daemon binary.
>
> You have already implemented the fork approach in the chroot patches.
> Handle-based open could work in the same way.
>
> To summarize this architecture: all path-related operations are
> performed by a separate privileged process.  File descriptors are passed
> to QEMU over a UNIX domain socket.  This way QEMU can do the actual
> read(2)/write(2) calls directly to/from guest memory.
>
> I think it would be nice to build a completely separate binary that QEMU
> connects to.  The separate binary would have a much smaller footprint
> (doesn't include QEMU code).  More importantly the
> privileged/unprivileged boundary would be simple and could be
> automatically set up by libvirt:
>
> $ sudo namespace_helper --sock /var/run/virtfs/1234.sock --export my_dir/
> $ qemu -fsdev local,id=my_fs,namespace_helper=/var/run/virtfs/1234.sock \
>       -device virtio-9p-pci,fsdev=my_fs
>
> Stefan
>



Re: [Qemu-devel] TCG sar UB (fwd)

2011-09-06 Thread malc
On Tue, 6 Sep 2011, Richard Henderson wrote:

> On 09/04/2011 08:03 AM, malc wrote:
> > On Sun, 4 Sep 2011, Richard Henderson wrote:
> > 
> >> On 09/03/2011 03:47 PM, malc wrote:
> >>> Doesn't make much sense to me, guest clearly asked for 0 and not -1,
> >>> besides -1 violates TCG's sar constraints and PPC obliges by emiting
> >>> illegal instruction in this case.
> >>
> >> The shift that the guest asked for was completely folded away.
> >>
> >> The -1 comes from gen_shift_rm_T1 in the computation of the new
> >> flags value.  This could instead be moved inside the test for != 0,
> >> which is the only place that value is actually used anyway.
> >>
> >> Try this.  Lightly tested.
> > 
> > Now i either get hosts illegal instruction or (with logging enabled) a
> > guest kenrnel panic.
> 
> Hum.  Well, it's possible I made some silly mistake in the patch.
> You now know where to look for the problem.

Correct me if i'm wrong, previously the code worked like this:

mov tmp, 0
sub tmp, 1
sar r, r, tmp

Still UB as far as TCG is concerned but since no immediates are involved
things worked, now, with constant folding, we are asked to sar by -1 
directly.

> 
> That said, it seems easier for you in the PPC port to mask the 
> immediate operand so that you don't produce an illegal instruction.
> It doesn't really matter what value that insn produces, as the
> result won't be used.
> 

I did that when first hit this problem, but decided not to push it.

-- 
mailto:av1...@comtv.ru



[Qemu-devel] [Bug 818673] Re: virtio: trying to map MMIO memory

2011-09-06 Thread dx-vmonroig
We're affected by this bug, too. Trying to find a workaround, last
friday we changed VGA model to cirrus, and the machine is working
properly without new entries in log.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/818673

Title:
  virtio: trying to map MMIO memory

Status in QEMU:
  New

Bug description:
  Qemu host is Core i7, running Linux.  Guest is Windows XP sp3.
  Often, qemu will crash shortly after starting (1-5 minutes) with a statement 
"qemu-system-x86_64: virtio: trying to map MMIO memory"
  This has occured with qemu-kvm 0.14, qemu-kvm 0.14.1, qemu-0.15.0-rc0 and 
qemu 0.15.0-rc1.
  Qemu is started as such:
  qemu-system-x86_64 -cpu host -enable-kvm -pidfile /home/rick/qemu/hds/wxp.pid 
-drive file=/home/rick/qemu/hds/wxp.raw,if=virtio -m 768 -name WinXP -net 
nic,model=virtio -net user -localtime -usb -vga qxl -device virtio-serial 
-chardev spicevmc,name=vdagent,id=vdagent -device 
virtserialport,chardev=vdagent,name=com.redhat.spice.0 -spice 
port=1234,disable-ticketing -daemonize -monitor 
telnet:localhost:12341,server,nowait
  The WXP guest has virtio 1.1.16 drivers for net and scsi, and the most 
current spice binaries from spice-space.org.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/818673/+subscriptions



[Qemu-devel] [PATCH 04/31] async: Allow nested qemu_bh_poll calls

2011-09-06 Thread Kevin Wolf
qemu may segfault when a BH handler first deletes a BH and then (possibly
indirectly) calls a nested qemu_bh_poll(). This is because the inner instance
frees the BH and deletes it from the list that the outer one processes.

This patch deletes BHs only in the outermost qemu_bh_poll instance.

Commit 7887f620 already tried to achieve the same, but it assumed that the BH
handler would only delete its own BH. With a nested qemu_bh_poll(), this isn't
guaranteed, so that commit wasn't enough. Hope this one fixes it for real.

Signed-off-by: Kevin Wolf 
---
 async.c |   24 
 1 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/async.c b/async.c
index 9d4e960..ca13962 100644
--- a/async.c
+++ b/async.c
@@ -55,6 +55,9 @@ int qemu_bh_poll(void)
 {
 QEMUBH *bh, **bhp, *next;
 int ret;
+static int nesting = 0;
+
+nesting++;
 
 ret = 0;
 for (bh = first_bh; bh; bh = next) {
@@ -68,15 +71,20 @@ int qemu_bh_poll(void)
 }
 }
 
+nesting--;
+
 /* remove deleted bhs */
-bhp = &first_bh;
-while (*bhp) {
-bh = *bhp;
-if (bh->deleted) {
-*bhp = bh->next;
-g_free(bh);
-} else
-bhp = &bh->next;
+if (!nesting) {
+bhp = &first_bh;
+while (*bhp) {
+bh = *bhp;
+if (bh->deleted) {
+*bhp = bh->next;
+g_free(bh);
+} else {
+bhp = &bh->next;
+}
+}
 }
 
 return ret;
-- 
1.7.6




[Qemu-devel] [PATCH 02/31] qcow2: Properly initialise QcowL2Meta

2011-09-06 Thread Kevin Wolf
Dependency list pointers filled with random garbage from the stack aren't a
good idea.

Signed-off-by: Kevin Wolf 
---
 block/qcow2.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index b725d68..f26f7b6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -526,13 +526,14 @@ static int qcow2_co_writev(BlockDriverState *bs,
 int n_end;
 int ret;
 int cur_nr_sectors; /* number of sectors in current iteration */
-QCowL2Meta l2meta;
 uint64_t cluster_offset;
 QEMUIOVector hd_qiov;
 uint64_t bytes_done = 0;
 uint8_t *cluster_data = NULL;
+QCowL2Meta l2meta = {
+.nb_clusters = 0,
+};
 
-l2meta.nb_clusters = 0;
 qemu_co_queue_init(&l2meta.dependent_requests);
 
 qemu_iovec_init(&hd_qiov, qiov->niov);
-- 
1.7.6




[Qemu-devel] [PATCH 29/31] scsi: refine constants for READ CAPACITY 16

2011-09-06 Thread Kevin Wolf
From: Paolo Bonzini 

Rename SERVICE_ACTION_IN to SERVICE_ACTION_IN_16 to distinguish
from the 12-byte CDB variant, and add a constant for the subcommand.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Kevin Wolf 
---
 hw/scsi-bus.c  |3 ++-
 hw/scsi-defs.h |8 +++-
 hw/scsi-disk.c |6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 6f0d039..d6fd19a 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -977,7 +977,7 @@ static const char *scsi_command_name(uint8_t cmd)
 [ LOCATE_16] = "LOCATE_16",
 [ WRITE_SAME_16] = "WRITE_SAME_16",
 [ ERASE_16 ] = "ERASE_16",
-[ SERVICE_ACTION_IN] = "SERVICE_ACTION_IN",
+[ SERVICE_ACTION_IN_16 ] = "SERVICE_ACTION_IN_16",
 [ WRITE_LONG_16] = "WRITE_LONG_16",
 [ REPORT_LUNS  ] = "REPORT_LUNS",
 [ BLANK] = "BLANK",
@@ -987,6 +987,7 @@ static const char *scsi_command_name(uint8_t cmd)
 [ LOAD_UNLOAD  ] = "LOAD_UNLOAD",
 [ READ_12  ] = "READ_12",
 [ WRITE_12 ] = "WRITE_12",
+[ SERVICE_ACTION_IN_12 ] = "SERVICE_ACTION_IN_12",
 [ WRITE_VERIFY_12  ] = "WRITE_VERIFY_12",
 [ VERIFY_12] = "VERIFY_12",
 [ SEARCH_HIGH_12   ] = "SEARCH_HIGH_12",
diff --git a/hw/scsi-defs.h b/hw/scsi-defs.h
index ea288fa..bfe9392 100644
--- a/hw/scsi-defs.h
+++ b/hw/scsi-defs.h
@@ -102,7 +102,7 @@
 #define LOCATE_16 0x92
 #define WRITE_SAME_16 0x93
 #define ERASE_16  0x93
-#define SERVICE_ACTION_IN 0x9e
+#define SERVICE_ACTION_IN_16  0x9e
 #define WRITE_LONG_16 0x9f
 #define REPORT_LUNS   0xa0
 #define BLANK 0xa1
@@ -112,6 +112,7 @@
 #define LOAD_UNLOAD   0xa6
 #define READ_12   0xa8
 #define WRITE_12  0xaa
+#define SERVICE_ACTION_IN_12  0xab
 #define WRITE_VERIFY_12   0xae
 #define VERIFY_12 0xaf
 #define SEARCH_HIGH_120xb0
@@ -123,6 +124,11 @@
 #define SET_CD_SPEED  0xbb
 
 /*
+ * SERVICE ACTION IN subcodes
+ */
+#define SAI_READ_CAPACITY_16  0x10
+
+/*
  *  SAM Status codes
  */
 
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index c23c2b8..ead4163 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -890,9 +890,9 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, 
uint8_t *outbuf)
 outbuf[7] = 8; // CD-ROM
 buflen = 8;
 break;
-case SERVICE_ACTION_IN:
+case SERVICE_ACTION_IN_16:
 /* Service Action In subcommands. */
-if ((req->cmd.buf[1] & 31) == 0x10) {
+if ((req->cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
 DPRINTF("SAI READ CAPACITY(16)\n");
 memset(outbuf, 0, req->cmd.xfer);
 bdrv_get_geometry(s->bs, &nb_sectors);
@@ -992,7 +992,7 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t 
*buf)
 case READ_CAPACITY_10:
 case READ_TOC:
 case GET_CONFIGURATION:
-case SERVICE_ACTION_IN:
+case SERVICE_ACTION_IN_16:
 case VERIFY_10:
 rc = scsi_disk_emulate_command(r, outbuf);
 if (rc < 0) {
-- 
1.7.6




[Qemu-devel] [PATCH 17/31] spitz tosa: Simplify "drive is suitable for microdrive" test

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

We try the drive defined with -drive if=ide,index=0 (or equivalent
sugar).  We use it only if (dinfo && bdrv_is_inserted(dinfo->bdrv) &&
!bdrv_is_removable(dinfo->bdrv)).  This is a convoluted way to test
for "drive media can't be removed".

The only way to create such a drive with -drive if=ide is media=cdrom.
And that sets dinfo->media_cd, so just test that.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 hw/spitz.c |   10 +++---
 hw/tosa.c  |   10 +++---
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/hw/spitz.c b/hw/spitz.c
index c05b5f7..0adae59 100644
--- a/hw/spitz.c
+++ b/hw/spitz.c
@@ -708,17 +708,13 @@ static void spitz_ssp_attach(PXA2xxState *cpu)
 static void spitz_microdrive_attach(PXA2xxState *cpu, int slot)
 {
 PCMCIACardState *md;
-BlockDriverState *bs;
 DriveInfo *dinfo;
 
 dinfo = drive_get(IF_IDE, 0, 0);
-if (!dinfo)
+if (!dinfo || dinfo->media_cd)
 return;
-bs = dinfo->bdrv;
-if (bdrv_is_inserted(bs) && !bdrv_is_removable(bs)) {
-md = dscm1_init(dinfo);
-pxa2xx_pcmcia_attach(cpu->pcmcia[slot], md);
-}
+md = dscm1_init(dinfo);
+pxa2xx_pcmcia_attach(cpu->pcmcia[slot], md);
 }
 
 /* Wm8750 and Max7310 on I2C */
diff --git a/hw/tosa.c b/hw/tosa.c
index a7967a2..7b407f4 100644
--- a/hw/tosa.c
+++ b/hw/tosa.c
@@ -51,17 +51,13 @@
 static void tosa_microdrive_attach(PXA2xxState *cpu)
 {
 PCMCIACardState *md;
-BlockDriverState *bs;
 DriveInfo *dinfo;
 
 dinfo = drive_get(IF_IDE, 0, 0);
-if (!dinfo)
+if (!dinfo || dinfo->media_cd)
 return;
-bs = dinfo->bdrv;
-if (bdrv_is_inserted(bs) && !bdrv_is_removable(bs)) {
-md = dscm1_init(dinfo);
-pxa2xx_pcmcia_attach(cpu->pcmcia[0], md);
-}
+md = dscm1_init(dinfo);
+pxa2xx_pcmcia_attach(cpu->pcmcia[0], md);
 }
 
 static void tosa_out_switch(void *opaque, int line, int level)
-- 
1.7.6




[Qemu-devel] [PATCH 27/31] scsi: execute SYNCHRONIZE_CACHE asynchronously

2011-09-06 Thread Kevin Wolf
From: Paolo Bonzini 

Signed-off-by: Paolo Bonzini 
Signed-off-by: Kevin Wolf 
---
 hw/scsi-disk.c |   41 +
 1 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 3cc830f..5c652a2 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -129,6 +129,24 @@ static void scsi_read_complete(void * opaque, int ret)
 scsi_req_data(&r->req, r->iov.iov_len);
 }
 
+static void scsi_flush_complete(void * opaque, int ret)
+{
+SCSIDiskReq *r = (SCSIDiskReq *)opaque;
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
+
+if (r->req.aiocb != NULL) {
+r->req.aiocb = NULL;
+bdrv_acct_done(s->bs, &r->acct);
+}
+
+if (ret < 0) {
+if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
+return;
+}
+}
+
+scsi_req_complete(&r->req, GOOD);
+}
 
 /* Read more data from scsi device into buffer.  */
 static void scsi_read_data(SCSIRequest *req)
@@ -792,7 +810,6 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, 
uint8_t *outbuf)
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
 uint64_t nb_sectors;
 int buflen = 0;
-int ret;
 
 switch (req->cmd.buf[0]) {
 case TEST_UNIT_READY:
@@ -864,20 +881,6 @@ static int scsi_disk_emulate_command(SCSIDiskReq *r, 
uint8_t *outbuf)
 outbuf[7] = 0;
 buflen = 8;
 break;
-case SYNCHRONIZE_CACHE:
-{
-BlockAcctCookie acct;
-
-bdrv_acct_start(s->bs, &acct, 0, BDRV_ACCT_FLUSH);
-ret = bdrv_flush(s->bs);
-bdrv_acct_done(s->bs, &acct);
-if (ret < 0) {
-if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_FLUSH)) {
-return -1;
-}
-}
-break;
-}
 case GET_CONFIGURATION:
 memset(outbuf, 0, 8);
 /* ??? This should probably return much more information.  For now
@@ -985,7 +988,6 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t 
*buf)
 case START_STOP:
 case ALLOW_MEDIUM_REMOVAL:
 case READ_CAPACITY_10:
-case SYNCHRONIZE_CACHE:
 case READ_TOC:
 case GET_CONFIGURATION:
 case SERVICE_ACTION_IN:
@@ -997,6 +999,13 @@ static int32_t scsi_send_command(SCSIRequest *req, uint8_t 
*buf)
 
 r->iov.iov_len = rc;
 break;
+case SYNCHRONIZE_CACHE:
+bdrv_acct_start(s->bs, &r->acct, 0, BDRV_ACCT_FLUSH);
+r->req.aiocb = bdrv_aio_flush(s->bs, scsi_flush_complete, r);
+if (r->req.aiocb == NULL) {
+scsi_flush_complete(r, -EIO);
+}
+return 0;
 case READ_6:
 case READ_10:
 case READ_12:
-- 
1.7.6




Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Jan Kiszka
On 2011-09-06 16:48, Michael S. Tsirkin wrote:
> On Fri, Aug 26, 2011 at 04:48:10PM +0200, Jan Kiszka wrote:
>> More than one year ago I posted some patches to add a monitor command
>> callend device_show. The purpose of that command is to dump the state of
>> some qdev device based on its vmstate.
>>
>> To improve the usability of that interface, the previous series also
>> tried to create a canonical qdev tree path name format and even added
>> monitor command expansion. That format created quite a few discussions,
>> and we never came to some conclusion.
>>
>> As device_show is still a useful tool for debugging but qdev structuring
>> and addressing may significantly change in the near future, I decided to
>> reanimate those patches while avoiding almost any change of the
>> addressing scheme. The only one I propose is automatic ID assignment for
>> anonymous devices so that any qdev device is in principle addressable
>> (e.g. APIC and IO-APIC on x86).
>>
>> As back then, device_show remains HMP-only to avoid any stable ABI
>> issues around QMP.
> 
> I'm afraid that won't be enough to stop people
> scripting this command - libvirt accessed
> HMP for years.
> 
> On the other hand, no QMP command means e.g.
> libvirt users don't get any benefit from this.
> 
> What I think will solve these problems, for both HMP and QMP,
> is an explicit 'debug_unstable' or 'debug_unsupported' command that will
> expose all kind of debugging functionality making it
> very explicit that it's an unsupported debugging utility.
> 
> Proposed syntax:
> 
> debug_unstable  
> 
> Example:
> 
> debug_unstable device_show -all

For HMP, this would needlessly complicate the user interface, nothing I
would support. People scripting things on top of HMP are generally doing
this on their own risk and cannot expect output stability.

device_show is like info qtree: the output will naturally change as the
emulated hardware evolves, information is added/removed, or we simply
improve the layout. Recent changes on info network are an example for
the latter.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux



[Qemu-devel] [PATCH 22/31] VMDK: Opening compressed extent.

2011-09-06 Thread Kevin Wolf
From: Fam Zheng 

Added flags field for compressed/streamOptimized extents, open and save
image configuration.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c |   16 
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 106fdbd..8a96cfd 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -26,9 +26,13 @@
 #include "qemu-common.h"
 #include "block_int.h"
 #include "module.h"
+#include "zlib.h"
 
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
+#define VMDK4_COMPRESSION_DEFLATE 1
+#define VMDK4_FLAG_COMPRESS (1 << 16)
+#define VMDK4_FLAG_MARKER (1 << 17)
 
 typedef struct {
 uint32_t version;
@@ -56,6 +60,7 @@ typedef struct {
 int64_t grain_offset;
 char filler[1];
 char check_bytes[4];
+uint16_t compressAlgorithm;
 } QEMU_PACKED VMDK4Header;
 
 #define L2_CACHE_SIZE 16
@@ -63,6 +68,8 @@ typedef struct {
 typedef struct VmdkExtent {
 BlockDriverState *file;
 bool flat;
+bool compressed;
+bool has_marker;
 int64_t sectors;
 int64_t end_sector;
 int64_t flat_start_offset;
@@ -98,6 +105,12 @@ typedef struct VmdkMetaData {
 int valid;
 } VmdkMetaData;
 
+typedef struct VmdkGrainMarker {
+uint64_t lba;
+uint32_t size;
+uint8_t  data[0];
+} VmdkGrainMarker;
+
 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
 uint32_t magic;
@@ -423,6 +436,9 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
   l1_size,
   le32_to_cpu(header.num_gtes_per_gte),
   le64_to_cpu(header.granularity));
+extent->compressed =
+le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
+extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
 ret = vmdk_init_tables(bs, extent);
 if (ret) {
 /* free extent allocated by vmdk_add_extent */
-- 
1.7.6




Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Anthony Liguori

On 09/06/2011 10:45 AM, Jan Kiszka wrote:

On 2011-09-06 16:48, Michael S. Tsirkin wrote:

I'm afraid that won't be enough to stop people
scripting this command - libvirt accessed
HMP for years.

On the other hand, no QMP command means e.g.
libvirt users don't get any benefit from this.

What I think will solve these problems, for both HMP and QMP,
is an explicit 'debug_unstable' or 'debug_unsupported' command that will
expose all kind of debugging functionality making it
very explicit that it's an unsupported debugging utility.

Proposed syntax:

debug_unstable  

Example:

debug_unstable device_show -all


For HMP, this would needlessly complicate the user interface, nothing I
would support. People scripting things on top of HMP are generally doing
this on their own risk and cannot expect output stability.

device_show is like info qtree: the output will naturally change as the
emulated hardware evolves, information is added/removed, or we simply
improve the layout. Recent changes on info network are an example for
the latter.


Yeah, I'm not worried about stability.  HMP commands that aren't exposed 
as QMP commands are inherently unstable and should not be scripted to.


I'm still contemplating how we go about doing this.  This series 
introduces a couple new concepts like QMP class hinting anonymous IDs. 
I'm concerned that we'll further complicate the need to support 
backwards compatibility.


Would the command be useful if you couldn't address devices?  If it just 
dumped the full machine state all at once?  That would at least obviate 
the need to add anonymous IDs.


Regards,

Anthony Liguori



Jan






Re: [Qemu-devel] [PATCH 4/9] runstate_set(): Check for valid transitions

2011-09-06 Thread Jan Kiszka
On 2011-09-06 15:14, Luiz Capitulino wrote:
> This commit could have been folded with the previous one, however
> doing it separately will allow for easy bisect and revert if needed.
> 
> Checking and testing all valid transitions wasn't trivial, chances
> are this will need broader testing to become more stable.
> 
> Signed-off-by: Luiz Capitulino 
> ---
>  vl.c |  149 
> +-
>  1 files changed, 148 insertions(+), 1 deletions(-)
> 
> diff --git a/vl.c b/vl.c
> index 9926d2a..fe3628a 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -332,9 +332,156 @@ bool runstate_check(RunState state)
>  return current_run_state == state;
>  }
>  
> +/* This function will abort() on invalid state transitions */
>  void runstate_set(RunState new_state)
>  {
> -assert(new_state < RSTATE_MAX);
> +switch (current_run_state) {
> +case RSTATE_NO_STATE:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +case RSTATE_IN_MIGRATE:
> +case RSTATE_PRE_LAUNCH:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_DEBUG:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_IN_MIGRATE:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +case RSTATE_PRE_LAUNCH:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_PANICKED:
> +switch (new_state) {
> +case RSTATE_PAUSED:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_IO_ERROR:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_PAUSED:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_POST_MIGRATE:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_PRE_LAUNCH:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +case RSTATE_POST_MIGRATE:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_PRE_MIGRATE:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +case RSTATE_POST_MIGRATE:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_RESTORE:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_RUNNING:
> +switch (new_state) {
> +case RSTATE_DEBUG:
> +case RSTATE_PANICKED:
> +case RSTATE_IO_ERROR:
> +case RSTATE_PAUSED:
> +case RSTATE_PRE_MIGRATE:
> +case RSTATE_RESTORE:
> +case RSTATE_SAVEVM:
> +case RSTATE_SHUTDOWN:
> +case RSTATE_WATCHDOG:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_SAVEVM:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_SHUTDOWN:
> +switch (new_state) {
> +case RSTATE_PAUSED:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +case RSTATE_WATCHDOG:
> +switch (new_state) {
> +case RSTATE_RUNNING:
> +goto transition_ok;
> +default:
> +/* invalid transition */
> +abort();
> +}
> +abort();
> +default:
> +fprintf(stderr, "current run state is invalid: %s\n",
> +runstate_as_string());
> +abort();
> +}
> +
> +transition_ok:
>  current_run_state = new_state;
>  }
>  

I haven't looked at the tr

Re: [Qemu-devel] [PATCH 2/2] main: switch qemu_set_fd_handler to g_io_add_watch

2011-09-06 Thread Anthony Liguori

On 09/06/2011 09:31 AM, Paolo Bonzini wrote:

On 08/22/2011 03:47 PM, Paolo Bonzini wrote:

On 08/22/2011 03:45 PM, Anthony Liguori wrote:


Almost: in Win32 you need to use g_io_channel_win32_new_socket. But
indeed on Windows you can only use qemu_set_fd_handler for sockets too.


I think that's really only for read/write though. If you're just
polling on I/O, it shouldn't matter IIUC.

If someone has a Windows box, they can confirm/deny by using qemu
-monitor tcp:localhost:1024,socket,nowait with this patch.


Actually you're right, it works automagically:

* On Win32, this can be used either for files opened with the MSVCRT
* (the Microsoft run-time C library) _open() or _pipe, including file
* descriptors 0, 1 and 2 (corresponding to stdin, stdout and stderr),
* or for Winsock SOCKETs. If the parameter is a legal file
* descriptor, it is assumed to be such, otherwise it should be a
* SOCKET. This relies on SOCKETs and file descriptors not
* overlapping. If you want to be certain, call either
* g_io_channel_win32_new_fd() or g_io_channel_win32_new_socket()
* instead as appropriate.

So this patch would even let interested people enable exec migration on
Windows.


Hmmm, after reading documentation better, this unfortunately is
completely broken under Windows, for two reasons:

1) in patch 1/2 you're using the glib pollfds and passing them to
select(). Unfortunately under Windows they are special and can only be
passed to g_poll(). Unfortunately, this can be fixed by changing the
QEMU main loop to use poll() instead of select()...


Hrm, okay.


2) ... because glib IO channels cannot be used just for watches under
Windows:

/* Create an IO channel for C runtime (emulated Unix-like) file
* descriptors. After calling g_io_add_watch() on a IO channel
* returned by this function, you shouldn't call read() on the file
* descriptor. This is because adding polling for a file descriptor is
* implemented on Win32 by starting a thread that sits blocked in a
* read() from the file descriptor most of the time. All reads from
* the file descriptor should be done by this internal GLib
* thread. Your code should call only g_io_channel_read().
*/

So, I believe the right solution would be to drop this patch for now and
make 1/2 conditional on !_WIN32.


So it should be possible to add a new Source type that just selects on a 
file descriptor and avoid GIOChannels?


Regards,

Anthony Liguori



Paolo





[Qemu-devel] [PATCH 08/31] ide: Update command code definitions as per ACS-2 Table B.2

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Drop WIN_SRST, it has the same value as WIN_DEVICE_RESET.

Drop unused WIN_RESTORE, it has the same value as WIN_RECAL.

Drop codes that are not implemented and long obsolete: WIN_READ_LONG,
WIN_READ_LONG_ONCE, WIN_WRITE_LONG, WIN_WRITE_LONG_ONCE, WIN_FORMAT
(all obsolete since ATA4), WIN_ACKMEDIACHANGE, WIN_POSTBOOT,
WIN_PREBOOT (obsolete since ATA3), WIN_WRITE_SAME (obsolete since
ATA3, code reused for something else in ACS2), WIN_IDENTIFY_DMA
(obsolete since ATA4).

Drop codes that are not implemented and vendor-specific:
EXABYTE_ENABLE_NEST, DISABLE_SEAGATE.

Drop WIN_INIT, it isn't implemented, its value used to be reserved,
and is used for something else since ATA8.

CFA_IDLEIMMEDIATE isn't specific to CFATA.  ACS-2 shows it as a
defined command in ATA-1, -2 and -3.  Rename to WIN_IDLEIMMEDIATE2.

Mark vendor specific, retired, and obsolete codes.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 hw/ide/core.c |4 +-
 hw/ide/internal.h |  171 -
 2 files changed, 92 insertions(+), 83 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 47a706e..d1cbfe7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1129,7 +1129,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 case WIN_STANDBYNOW1:
 case WIN_STANDBYNOW2:
 case WIN_IDLEIMMEDIATE:
-case CFA_IDLEIMMEDIATE:
+case WIN_IDLEIMMEDIATE2:
 case WIN_SETIDLE1:
 case WIN_SETIDLE2:
 case WIN_SLEEPNOW1:
@@ -1168,7 +1168,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
   */
 ide_set_irq(s->bus);
 break;
-case WIN_SRST:
+case WIN_DEVICE_RESET:
 if (s->drive_kind != IDE_CD)
 goto abort_cmd;
 ide_set_signature(s);
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 7f5ef8d..1117852 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -55,111 +55,120 @@ typedef struct IDEDMAOps IDEDMAOps;
 #define IDE_CMD_RESET   0x04
 #define IDE_CMD_DISABLE_IRQ 0x02
 
-/* ATA/ATAPI Commands pre T13 Spec */
+/* ACS-2 T13/2015-D Table B.2 Command codes */
 #define WIN_NOP0x00
-/*
- * 0x01->0x02 Reserved
- */
+/* reserved 0x01..0x02 */
 #define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code 
*/
-/*
- *  0x04->0x05 Reserved
- */
+/* reserved 0x04..0x05 */
 #define WIN_DSM 0x06
-/*
- *  0x07 Reserved
- */
-#define WIN_SRST   0x08 /* ATAPI soft reset command */
+/* reserved 0x07 */
 #define WIN_DEVICE_RESET   0x08
-/*
- * 0x09->0x0F Reserved
- */
-#define WIN_RECAL  0x10
-#define WIN_RESTOREWIN_RECAL
-/*
- * 0x10->0x1F Reserved
- */
+/* reserved 0x09..0x0a */
+/* REQUEST SENSE DATA EXT   0x0B */
+/* reserved 0x0C..0x0F */
+#define WIN_RECAL   0x10 /* obsolete since ATA4 */
+/* obsolete since ATA3, retired in ATA4 0x11..0x1F */
 #define WIN_READ   0x20 /* 28-Bit */
-#define WIN_READ_ONCE  0x21 /* 28-Bit without retries */
-#define WIN_READ_LONG  0x22 /* 28-Bit */
-#define WIN_READ_LONG_ONCE 0x23 /* 28-Bit without retries */
+#define WIN_READ_ONCE   0x21 /* 28-Bit w/o retries, obsolete 
since ATA5 */
+/* obsolete since ATA4  0x22..0x23 */
 #define WIN_READ_EXT   0x24 /* 48-Bit */
 #define WIN_READDMA_EXT0x25 /* 48-Bit */
-#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT  0x26 /* 48-Bit, obsolete since ACS2 */
 #define WIN_READ_NATIVE_MAX_EXT0x27 /* 48-Bit */
-/*
- * 0x28
- */
+/* reserved 0x28 */
 #define WIN_MULTREAD_EXT   0x29 /* 48-Bit */
-/*
- * 0x2A->0x2F Reserved
- */
+/* READ STREAM DMA EXT  0x2A */
+/* READ STREAM EXT  0x2B */
+/* reserved 0x2C..0x2E */
+/* READ LOG EXT 0x2F */
 #define WIN_WRITE  0x30 /* 28-Bit */
-#define WIN_WRITE_ONCE 0x31 /* 28-Bit without retries */
-#define WIN_WRITE_LONG 0x32 /* 28-Bit */
-#define WIN_WRITE_LONG_ONCE0x33 /* 28-Bit without retries */
+#define WIN_WRITE_ONCE  0x31 /* 28-Bit w/o retries, obsolete 
since ATA5 */
+/* obsolete since ATA4  0x32..0x33 */
 #define WIN_WRITE_EXT  0x34 /* 48-Bit */
 #define WIN_WRITEDMA_EXT   0x35 /* 48-Bit */
 #define WIN_WRITEDMA_QUEUED_EXT0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT 0x37 /* 48-Bit, obsolete since ACS2 */
 #define WIN_SET_MAX_EXT

Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Jan Kiszka
On 2011-09-06 17:51, Anthony Liguori wrote:
> I'm still contemplating how we go about doing this.  This series 
> introduces a couple new concepts like QMP class hinting anonymous IDs. 
> I'm concerned that we'll further complicate the need to support 
> backwards compatibility.

Anonymous IDs must not be considered stable. If you prefer, we could try
to filter them out for QMP use. Need to check though if there is a good
location to catch them, but the rule is trivial.

I will also happily drop anonymous IDs again once we have converted all
devices to stable QOM full path addressing. But that will simply take
too much time to wait for it I'm afraid.

> 
> Would the command be useful if you couldn't address devices?  If it just 
> dumped the full machine state all at once?  That would at least obviate 
> the need to add anonymous IDs.

Theoretically usable, but extremely unhandy. I would not like so see
such an interface exposed to users.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Michael S. Tsirkin
On Tue, Sep 06, 2011 at 10:51:26AM -0500, Anthony Liguori wrote:
> On 09/06/2011 10:45 AM, Jan Kiszka wrote:
> >On 2011-09-06 16:48, Michael S. Tsirkin wrote:
> >>I'm afraid that won't be enough to stop people
> >>scripting this command - libvirt accessed
> >>HMP for years.
> >>
> >>On the other hand, no QMP command means e.g.
> >>libvirt users don't get any benefit from this.
> >>
> >>What I think will solve these problems, for both HMP and QMP,
> >>is an explicit 'debug_unstable' or 'debug_unsupported' command that will
> >>expose all kind of debugging functionality making it
> >>very explicit that it's an unsupported debugging utility.
> >>
> >>Proposed syntax:
> >>
> >>debug_unstable  
> >>
> >>Example:
> >>
> >>debug_unstable device_show -all
> >
> >For HMP, this would needlessly complicate the user interface, nothing I
> >would support. People scripting things on top of HMP are generally doing
> >this on their own risk and cannot expect output stability.
> >
> >device_show is like info qtree: the output will naturally change as the
> >emulated hardware evolves, information is added/removed, or we simply
> >improve the layout. Recent changes on info network are an example for
> >the latter.
> 
> Yeah, I'm not worried about stability.  HMP commands that aren't
> exposed as QMP commands are inherently unstable and should not be
> scripted to.

They are also not accessible when using libvirt, right?
Which means almost all cases I care about: debugging on my laptop
I can easily attach with gdb and inspect state.

> I'm still contemplating how we go about doing this.  This series
> introduces a couple new concepts like QMP class hinting anonymous
> IDs. I'm concerned that we'll further complicate the need to support
> backwards compatibility.

Dazed and confused. Above you stated these commands are
inherently unstable and no need to support?

> Would the command be useful if you couldn't address devices?  If it
> just dumped the full machine state all at once?  That would at least
> obviate the need to add anonymous IDs.
> 
> Regards,
> 
> Anthony Liguori
> 
> >
> >Jan
> >



Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Anthony Liguori

On 09/06/2011 11:05 AM, Jan Kiszka wrote:

On 2011-09-06 17:51, Anthony Liguori wrote:

I'm still contemplating how we go about doing this.  This series
introduces a couple new concepts like QMP class hinting anonymous IDs.
I'm concerned that we'll further complicate the need to support
backwards compatibility.


Anonymous IDs must not be considered stable. If you prefer, we could try
to filter them out for QMP use. Need to check though if there is a good
location to catch them, but the rule is trivial.

I will also happily drop anonymous IDs again once we have converted all
devices to stable QOM full path addressing. But that will simply take
too much time to wait for it I'm afraid.



Would the command be useful if you couldn't address devices?  If it just
dumped the full machine state all at once?  That would at least obviate
the need to add anonymous IDs.


Theoretically usable, but extremely unhandy. I would not like so see
such an interface exposed to users.


What about just taking the device type (iow the savevm section name)?

In most scenarios, it's probably easier for a user and will still only 
have a single match.


Regards,

Anthony Liguori



Jan






[Qemu-devel] [PATCH 26/31] VMDK: bugfix, opening vSphere 4 exported image

2011-09-06 Thread Kevin Wolf
From: Fam Zheng 

The vSphere 4 exported image is streamOptimized extent, which is not
quite correctly handled. Ignore rdgOffset when RGD flag bit not set.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c |9 +++--
 1 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 892b18e..6c8edfc 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -31,6 +31,7 @@
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
+#define VMDK4_FLAG_RGD (1 << 1)
 #define VMDK4_FLAG_COMPRESS (1 << 16)
 #define VMDK4_FLAG_MARKER (1 << 17)
 
@@ -55,8 +56,8 @@ typedef struct {
 int64_t desc_offset;
 int64_t desc_size;
 int32_t num_gtes_per_gte;
-int64_t rgd_offset;
 int64_t gd_offset;
+int64_t rgd_offset;
 int64_t grain_offset;
 char filler[1];
 char check_bytes[4];
@@ -420,6 +421,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
 uint32_t l1_size, l1_entry_sectors;
 VMDK4Header header;
 VmdkExtent *extent;
+int64_t l1_backup_offset = 0;
 
 ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
 if (ret < 0) {
@@ -435,10 +437,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
 }
 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
 / l1_entry_sectors;
+if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
+l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
+}
 extent = vmdk_add_extent(bs, file, false,
   le64_to_cpu(header.capacity),
   le64_to_cpu(header.gd_offset) << 9,
-  le64_to_cpu(header.rgd_offset) << 9,
+  l1_backup_offset,
   l1_size,
   le32_to_cpu(header.num_gtes_per_gte),
   le64_to_cpu(header.granularity));
-- 
1.7.6




[Qemu-devel] [PATCH 05/31] block: Attach non-qdev devices as well

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

For now, this just protects against programming errors like having the
same drive back multiple non-qdev devices, or untimely bdrv_delete().
Later commits will add other interesting uses.

While there, rename BlockDriverState member peer to dev, bdrv_attach()
to bdrv_attach_dev(), bdrv_detach() to bdrv_detach_dev(), and
bdrv_get_attached() to bdrv_get_attached_dev().

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 block.c  |   29 -
 block.h  |7 ---
 block_int.h  |3 ++-
 blockdev.c   |5 ++---
 hw/ide/core.c|1 +
 hw/ide/piix.c|4 ++--
 hw/pflash_cfi01.c|1 +
 hw/pflash_cfi02.c|1 +
 hw/qdev-properties.c |6 +++---
 hw/sd.c  |1 +
 hw/usb-msd.c |2 +-
 hw/xen_disk.c|1 +
 12 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/block.c b/block.c
index 43742b7..fb9341c 100644
--- a/block.c
+++ b/block.c
@@ -755,7 +755,7 @@ void bdrv_make_anon(BlockDriverState *bs)
 
 void bdrv_delete(BlockDriverState *bs)
 {
-assert(!bs->peer);
+assert(!bs->dev);
 
 /* remove from list, if necessary */
 bdrv_make_anon(bs);
@@ -769,26 +769,37 @@ void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
-int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
+int bdrv_attach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
 {
-if (bs->peer) {
+if (bs->dev) {
 return -EBUSY;
 }
-bs->peer = qdev;
+bs->dev = dev;
 return 0;
 }
 
-void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
+/* TODO qdevified devices don't use this, remove when devices are qdevified */
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
 {
-assert(bs->peer == qdev);
-bs->peer = NULL;
+if (bdrv_attach_dev(bs, dev) < 0) {
+abort();
+}
+}
+
+void bdrv_detach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+assert(bs->dev == dev);
+bs->dev = NULL;
 bs->change_cb = NULL;
 bs->change_opaque = NULL;
 }
 
-DeviceState *bdrv_get_attached(BlockDriverState *bs)
+/* TODO change to return DeviceState * when all users are qdevified */
+void *bdrv_get_attached_dev(BlockDriverState *bs)
 {
-return bs->peer;
+return bs->dev;
 }
 
 /*
diff --git a/block.h b/block.h
index 3ac0b94..c8eea67 100644
--- a/block.h
+++ b/block.h
@@ -74,9 +74,10 @@ int bdrv_file_open(BlockDriverState **pbs, const char 
*filename, int flags);
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
   BlockDriver *drv);
 void bdrv_close(BlockDriverState *bs);
-int bdrv_attach(BlockDriverState *bs, DeviceState *qdev);
-void bdrv_detach(BlockDriverState *bs, DeviceState *qdev);
-DeviceState *bdrv_get_attached(BlockDriverState *bs);
+int bdrv_attach_dev(BlockDriverState *bs, void *dev);
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
+void bdrv_detach_dev(BlockDriverState *bs, void *dev);
+void *bdrv_get_attached_dev(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
   uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
diff --git a/block_int.h b/block_int.h
index 8a72b80..adc26f8 100644
--- a/block_int.h
+++ b/block_int.h
@@ -168,7 +168,8 @@ struct BlockDriverState {
 BlockDriver *drv; /* NULL means no media */
 void *opaque;
 
-DeviceState *peer;
+void *dev;  /* attached device model, if any */
+/* TODO change to DeviceState when all users are qdevified */
 
 char filename[1024];
 char backing_file[1024]; /* if non zero, the image is a diff of
diff --git a/blockdev.c b/blockdev.c
index 2602591..049dda5 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -14,7 +14,6 @@
 #include "qemu-option.h"
 #include "qemu-config.h"
 #include "sysemu.h"
-#include "hw/qdev.h"
 #include "block_int.h"
 
 static QTAILQ_HEAD(drivelist, DriveInfo) drives = 
QTAILQ_HEAD_INITIALIZER(drives);
@@ -738,12 +737,12 @@ int do_drive_del(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 bdrv_flush(bs);
 bdrv_close(bs);
 
-/* if we have a device associated with this BlockDriverState (bs->peer)
+/* if we have a device attached to this BlockDriverState
  * then we need to make the drive anonymous until the device
  * can be removed.  If this is a drive with no device backing
  * then we can just get rid of the block driver state right here.
  */
-if (bs->peer) {
+if (bdrv_get_attached_dev(bs)) {
 bdrv_make_anon(bs);
 } else {
 drive_uninit(drive_get_by_blockdev(bs));
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 40abc1e..428f033 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1890,6 +1890,7 @@ void ide_init2_with_non_qdev_drives(IDEBus *bus, 
DriveInfo *hd0,
  

[Qemu-devel] [PULL 00/31] Block patches

2011-09-06 Thread Kevin Wolf
The following changes since commit f69539b14bdba7a5cd22e1f4bed439b476b17286:

  apb_pci: convert PCI space to memory API (2011-09-04 09:28:04 +)

are available in the git repository at:
  git://repo.or.cz/qemu/kevin.git for-anthony

Fam Zheng (8):
  VMDK: enable twoGbMaxExtentFlat
  VMDK: add twoGbMaxExtentSparse support
  VMDK: separate vmdk_read_extent/vmdk_write_extent
  VMDK: Opening compressed extent.
  VMDK: read/write compressed extent
  VMDK: creating streamOptimized subformat
  VMDK: bugfix, open Haiku vmdk image
  VMDK: bugfix, opening vSphere 4 exported image

Frediano Ziglio (1):
  linux aio: some comments

Kevin Wolf (3):
  qcow2: Properly initialise QcowL2Meta
  qcow2: Fix error cases to run depedent requests
  async: Allow nested qemu_bh_poll calls

Markus Armbruster (14):
  block: Attach non-qdev devices as well
  block: Generalize change_cb() to BlockDevOps
  block: Split change_cb() into change_media_cb(), resize_cb()
  ide: Update command code definitions as per ACS-2 Table B.2
  ide: Clean up case label indentation in ide_exec_cmd()
  ide: Give vmstate structs internal linkage where possible
  block/raw: Fix to forward method bdrv_media_changed()
  block: Leave tracking media change to device models
  fdc: Make media change detection more robust
  block: Clean up bdrv_flush_all()
  savevm: Include writable devices with removable media
  xen: Clean up pci_piix3_xen_ide_unplug()'s test for "not a CD"
  spitz tosa: Simplify "drive is suitable for microdrive" test
  block: Declare qemu_blockalign() in block.h, not block_int.h

Paolo Bonzini (5):
  scsi: execute SYNCHRONIZE_CACHE asynchronously
  scsi: fix accounting of writes
  scsi: refine constants for READ CAPACITY 16
  scsi: fill in additional sense length correctly
  scsi: improve MODE SENSE emulation

 async.c  |   24 +++-
 block.c  |  104 ---
 block.h  |   28 +++-
 block/qcow2.c|   12 +-
 block/raw-posix.c|4 +
 block/raw.c  |7 +
 block/vmdk.c |  346 +++---
 block_int.h  |   14 +--
 blockdev.c   |5 +-
 hw/fdc.c |   46 
 hw/ide/core.c|   35 +++---
 hw/ide/internal.h|  171 +
 hw/ide/piix.c|7 +-
 hw/pflash_cfi01.c|1 +
 hw/pflash_cfi02.c|1 +
 hw/qdev-properties.c |6 +-
 hw/scsi-bus.c|6 +-
 hw/scsi-defs.h   |8 +-
 hw/scsi-disk.c   |  157 +--
 hw/sd.c  |   14 +-
 hw/spitz.c   |   10 +-
 hw/tosa.c|   10 +-
 hw/usb-msd.c |2 +-
 hw/virtio-blk.c  |   12 +-
 hw/xen_disk.c|1 +
 linux-aio.c  |1 +
 savevm.c |4 +-
 27 files changed, 652 insertions(+), 384 deletions(-)



[Qemu-devel] [PATCH 18/31] block: Declare qemu_blockalign() in block.h, not block_int.h

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Device models should be able to use it without an unclean include of
block_int.h.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 block.h |2 ++
 block_int.h |2 --
 hw/sd.c |1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/block.h b/block.h
index 7e3f587..8ec409f 100644
--- a/block.h
+++ b/block.h
@@ -258,6 +258,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
 const char *base_filename, const char *base_fmt,
 char *options, uint64_t img_size, int flags);
 
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+
 #define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
 
 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
diff --git a/block_int.h b/block_int.h
index 539b4ed..5dc0074 100644
--- a/block_int.h
+++ b/block_int.h
@@ -224,8 +224,6 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque);
 void qemu_aio_release(void *p);
 
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
diff --git a/hw/sd.c b/hw/sd.c
index 45e95f9..1af62b2 100644
--- a/hw/sd.c
+++ b/hw/sd.c
@@ -31,7 +31,6 @@
 
 #include "hw.h"
 #include "block.h"
-#include "block_int.h"
 #include "sd.h"
 
 //#define DEBUG_SD 1
-- 
1.7.6




[Qemu-devel] [PATCH 19/31] VMDK: enable twoGbMaxExtentFlat

2011-09-06 Thread Kevin Wolf
From: Fam Zheng 

Enable the createType 'twoGbMaxExtentFlat'. The supporting code is
already in.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 5f673e9..1e9e8d0 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -551,7 +551,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int 
flags)
 if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
 return -EINVAL;
 }
-if (strcmp(ct, "monolithicFlat")) {
+if (strcmp(ct, "monolithicFlat") &&
+strcmp(ct, "twoGbMaxExtentFlat")) {
 fprintf(stderr,
 "VMDK: Not supported image type \"%s\""".\n", ct);
 return -ENOTSUP;
@@ -672,6 +673,7 @@ static int get_cluster_offset(BlockDriverState *bs,
 return 0;
 }
 
+offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
 l1_index = (offset >> 9) / extent->l1_entry_sectors;
 if (l1_index >= extent->l1_size) {
 return -1;
-- 
1.7.6




[Qemu-devel] [PATCH 13/31] fdc: Make media change detection more robust

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

fdctrl_change_cb() gets called on a virtual media change via monitor.
It would be nice if host device block drivers called it on physical
media change, but they don't.

bdrv_media_changed() lets you poll for media change, but it returns
"don't know" except with block driver "host_floppy".

FDrive member media_changed gets set on device initialization and by
fdctrl_change_cb(), and cleared by fdctrl_media_changed().  Thus, it's
set on first entry to fdctrl_media_changed() since device
initialization or virtual media change.

fdctrl_media_changed() ignores media_changed unless
bdrv_media_changed() returns "don't know".  If we change media via
monitor (setting media_changed), and the new media's block driver
returns 0, we lose.  Fortunately, "host_floppy" always returns 1 on
first call.  Brittle.  Clean it up not to rely on it.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 hw/fdc.c |   12 
 1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/fdc.c b/hw/fdc.c
index 4f4c621..1d44bbd 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -898,11 +898,15 @@ static int fdctrl_media_changed(FDrive *drv)
 
 if (!drv->bs)
 return 0;
-ret = bdrv_media_changed(drv->bs);
-if (ret < 0) {
-ret = drv->media_changed;
+if (drv->media_changed) {
+drv->media_changed = 0;
+ret = 1;
+} else {
+ret = bdrv_media_changed(drv->bs);
+if (ret < 0) {
+ret = 0;/* we don't know, assume no */
+}
 }
-drv->media_changed = 0;
 if (ret) {
 fd_revalidate(drv);
 }
-- 
1.7.6




Re: [Qemu-devel] [PATCH] Only build with -g CFLAGS/LDFLAGS if using --enable-debug and add --optflags.

2011-09-06 Thread Brad
- Original message -
> On 09/06/11 10:02, Brad wrote:
> > Only build with -g CFLAGS/LDFLAGS if using --enable-debug.
> > Add --optflags to allow overriding the default optimization
> > level added to CFLAGS.
> > 
> > This is a first draft of coming up with a patch I could potentially
> > push upstream based on much cruder local patches to do something
> > similar. I'm trying to eliminate having to patch the configure
> > script.
> 
> You don't have to.   You can just run 'make CFLAGS="$optflags"' to 
> override the defaults.   Nevertheless having optflags would be nice as 
> you don't have to type this for each make run then.

I do when its unconditionally on the commandline either way. If the configure 
scipt didnt put it their if CFLAGS wasnt empty it wouldnt be an issue.

> I don't think we should mess with the -g flag.   It should stay enabled 
> by default, so you can easily get a useful stacktrace out of a core 
> without having to rebuild with debug info first.

I dont care what the default is as long as I can disable it without patching.



Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Anthony Liguori

On 09/06/2011 11:09 AM, Michael S. Tsirkin wrote:

On Tue, Sep 06, 2011 at 10:51:26AM -0500, Anthony Liguori wrote:

On 09/06/2011 10:45 AM, Jan Kiszka wrote:

On 2011-09-06 16:48, Michael S. Tsirkin wrote:

I'm afraid that won't be enough to stop people
scripting this command - libvirt accessed
HMP for years.

On the other hand, no QMP command means e.g.
libvirt users don't get any benefit from this.

What I think will solve these problems, for both HMP and QMP,
is an explicit 'debug_unstable' or 'debug_unsupported' command that will
expose all kind of debugging functionality making it
very explicit that it's an unsupported debugging utility.

Proposed syntax:

debug_unstable   

Example:

debug_unstable device_show -all


For HMP, this would needlessly complicate the user interface, nothing I
would support. People scripting things on top of HMP are generally doing
this on their own risk and cannot expect output stability.

device_show is like info qtree: the output will naturally change as the
emulated hardware evolves, information is added/removed, or we simply
improve the layout. Recent changes on info network are an example for
the latter.


Yeah, I'm not worried about stability.  HMP commands that aren't
exposed as QMP commands are inherently unstable and should not be
scripted to.


They are also not accessible when using libvirt, right?


$ virsh human-monitor-passthrough GuestName device_show foo

Should work.


Which means almost all cases I care about: debugging on my laptop
I can easily attach with gdb and inspect state.


I'm still contemplating how we go about doing this.  This series
introduces a couple new concepts like QMP class hinting anonymous
IDs. I'm concerned that we'll further complicate the need to support
backwards compatibility.


Dazed and confused. Above you stated these commands are
inherently unstable and no need to support?


Right, my concern is that the unstable command results in changes that 
are visible in stable interfaces.  As Jan suggested, we could try to 
hide this from those commands but that seems a bit ugly to me.


Regards,

Anthony Liguori


Would the command be useful if you couldn't address devices?  If it
just dumped the full machine state all at once?  That would at least
obviate the need to add anonymous IDs.

Regards,

Anthony Liguori



Jan








Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Jan Kiszka
On 2011-09-06 18:09, Michael S. Tsirkin wrote:
> On Tue, Sep 06, 2011 at 10:51:26AM -0500, Anthony Liguori wrote:
>> On 09/06/2011 10:45 AM, Jan Kiszka wrote:
>>> On 2011-09-06 16:48, Michael S. Tsirkin wrote:
 I'm afraid that won't be enough to stop people
 scripting this command - libvirt accessed
 HMP for years.

 On the other hand, no QMP command means e.g.
 libvirt users don't get any benefit from this.

 What I think will solve these problems, for both HMP and QMP,
 is an explicit 'debug_unstable' or 'debug_unsupported' command that will
 expose all kind of debugging functionality making it
 very explicit that it's an unsupported debugging utility.

 Proposed syntax:

 debug_unstable  

 Example:

 debug_unstable device_show -all
>>>
>>> For HMP, this would needlessly complicate the user interface, nothing I
>>> would support. People scripting things on top of HMP are generally doing
>>> this on their own risk and cannot expect output stability.
>>>
>>> device_show is like info qtree: the output will naturally change as the
>>> emulated hardware evolves, information is added/removed, or we simply
>>> improve the layout. Recent changes on info network are an example for
>>> the latter.
>>
>> Yeah, I'm not worried about stability.  HMP commands that aren't
>> exposed as QMP commands are inherently unstable and should not be
>> scripted to.
> 
> They are also not accessible when using libvirt, right?
> Which means almost all cases I care about: debugging on my laptop
> I can easily attach with gdb and inspect state.

HMP passthrough or - I bet that's what you rather want - monitor
passthrough from gdb.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux



[Qemu-devel] [PATCH 31/31] scsi: improve MODE SENSE emulation

2011-09-06 Thread Kevin Wolf
From: Paolo Bonzini 

- do not return extra pages when requesting all pages (PAGE CODE = 0x3f)

- return correct sense code for PC = 3 (saved parameters not supported)

- do not return geometry pages for CD devices

Signed-off-by: Paolo Bonzini 
Signed-off-by: Kevin Wolf 
---
 hw/scsi-disk.c |   96 +++-
 1 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index ead4163..9724d0f 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -546,12 +546,12 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, 
uint8_t *outbuf)
 return buflen;
 }
 
-static int mode_sense_page(SCSIRequest *req, int page, uint8_t *p,
+static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
int page_control)
 {
-SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
 BlockDriverState *bdrv = s->bs;
 int cylinders, heads, secs;
+uint8_t *p = *p_outbuf;
 
 /*
  * If Changeable Values are requested, a mask denoting those mode 
parameters
@@ -561,10 +561,13 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
  */
 switch (page) {
 case 4: /* Rigid disk device geometry page. */
+if (s->qdev.type == TYPE_ROM) {
+return -1;
+}
 p[0] = 4;
 p[1] = 0x16;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 /* if a geometry hint is available, use it */
 bdrv_get_geometry_hint(bdrv, &cylinders, &heads, &secs);
@@ -590,13 +593,16 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 /* Medium rotation rate [rpm], 5400 rpm */
 p[20] = (5400 >> 8) & 0xff;
 p[21] = 5400 & 0xff;
-return p[1] + 2;
+break;
 
 case 5: /* Flexible disk device geometry page. */
+if (s->qdev.type == TYPE_ROM) {
+return -1;
+}
 p[0] = 5;
 p[1] = 0x1e;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 /* Transfer rate [kbit/s], 5Mbit/s */
 p[2] = 5000 >> 8;
@@ -629,26 +635,27 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 /* Medium rotation rate [rpm], 5400 rpm */
 p[28] = (5400 >> 8) & 0xff;
 p[29] = 5400 & 0xff;
-return p[1] + 2;
+break;
 
 case 8: /* Caching page.  */
 p[0] = 8;
 p[1] = 0x12;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 if (bdrv_enable_write_cache(s->bs)) {
 p[2] = 4; /* WCE */
 }
-return p[1] + 2;
+break;
 
 case 0x2a: /* CD Capabilities and Mechanical Status page. */
-if (s->qdev.type != TYPE_ROM)
-return 0;
+if (s->qdev.type != TYPE_ROM) {
+return -1;
+}
 p[0] = 0x2a;
 p[1] = 0x14;
 if (page_control == 1) { /* Changeable Values */
-return p[1] + 2;
+break;
 }
 p[2] = 3; // CD-R & CD-RW read
 p[3] = 0; // Writing not supported
@@ -673,27 +680,30 @@ static int mode_sense_page(SCSIRequest *req, int page, 
uint8_t *p,
 p[19] = (16 * 176) & 0xff;
 p[20] = (16 * 176) >> 8; // 16x write speed current
 p[21] = (16 * 176) & 0xff;
-return p[1] + 2;
+break;
 
 default:
-return 0;
+return -1;
 }
+
+*p_outbuf += p[1] + 2;
+return p[1] + 2;
 }
 
-static int scsi_disk_emulate_mode_sense(SCSIRequest *req, uint8_t *outbuf)
+static int scsi_disk_emulate_mode_sense(SCSIDiskReq *r, uint8_t *outbuf)
 {
-SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
 uint64_t nb_sectors;
-int page, dbd, buflen, page_control;
+int page, dbd, buflen, ret, page_control;
 uint8_t *p;
 uint8_t dev_specific_param;
 
-dbd = req->cmd.buf[1]  & 0x8;
-page = req->cmd.buf[2] & 0x3f;
-page_control = (req->cmd.buf[2] & 0xc0) >> 6;
+dbd = r->req.cmd.buf[1]  & 0x8;
+page = r->req.cmd.buf[2] & 0x3f;
+page_control = (r->req.cmd.buf[2] & 0xc0) >> 6;
 DPRINTF("Mode Sense(%d) (page %d, xfer %zd, page_control %d)\n",
-(req->cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, req->cmd.xfer, 
page_control);
-memset(outbuf, 0, req->cmd.xfer);
+(r->req.cmd.buf[0] == MODE_SENSE) ? 6 : 10, page, r->req.cmd.xfer, 
page_control);
+memset(outbuf, 0, r->req.cmd.xfer);
 p = outbuf;
 
 if (bdrv_is_read_only(s->bs)) {
@@ -702,7 +712,7 @@ static int scsi_disk_emulate_mode_sense(SCSIRequest *req, 
uint8_t *outbuf)
 dev_specific_param = 0x00;
 }
 
-if (req->cmd.buf[0] == MODE_SENSE) {
+if (r->req.cmd.buf[0] == MODE_SENSE) {
 p[1] = 0; /

[Qemu-devel] [PATCH 14/31] block: Clean up bdrv_flush_all()

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Change (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)) to just
bdrv_is_inserted().  Rationale:

The value of bdrv_is_removable(bs) matters only when
bdrv_is_inserted(bs) is false.

bdrv_is_inserted(bs) is true when bs is open (bs->drv != NULL) and
not an empty host drive (CD-ROM or floppy).

Therefore, bdrv_is_removable(bs) matters only when:

1. bs is not open
   old: may call bdrv_flush(bs), which does nothing
   new: won't call

2. bs is an empty host drive
   old: may call bdrv_flush(bs), which calls driver method
raw_flush(), which calls fdatasync() or equivalent, which
can't do anything useful while the drive is empty
   new: won't call

Result is bs->drv && !bdrv_is_read_only(bs) && bdrv_is_inserted(bs).
bdrv_is_inserted(bs) implies bs->drv.  Drop the redundant test.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 block.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 2158f83..487a6a9 100644
--- a/block.c
+++ b/block.c
@@ -1753,8 +1753,7 @@ void bdrv_flush_all(void)
 BlockDriverState *bs;
 
 QTAILQ_FOREACH(bs, &bdrv_states, list) {
-if (bs->drv && !bdrv_is_read_only(bs) &&
-(!bdrv_is_removable(bs) || bdrv_is_inserted(bs))) {
+if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
 bdrv_flush(bs);
 }
 }
-- 
1.7.6




Re: [Qemu-devel] [PATCH 0/6] Device state visualization reloaded

2011-09-06 Thread Jan Kiszka
On 2011-09-06 18:08, Anthony Liguori wrote:
> On 09/06/2011 11:05 AM, Jan Kiszka wrote:
>> On 2011-09-06 17:51, Anthony Liguori wrote:
>>> I'm still contemplating how we go about doing this.  This series
>>> introduces a couple new concepts like QMP class hinting anonymous IDs.
>>> I'm concerned that we'll further complicate the need to support
>>> backwards compatibility.
>>
>> Anonymous IDs must not be considered stable. If you prefer, we could try
>> to filter them out for QMP use. Need to check though if there is a good
>> location to catch them, but the rule is trivial.
>>
>> I will also happily drop anonymous IDs again once we have converted all
>> devices to stable QOM full path addressing. But that will simply take
>> too much time to wait for it I'm afraid.
>>
>>>
>>> Would the command be useful if you couldn't address devices?  If it just
>>> dumped the full machine state all at once?  That would at least obviate
>>> the need to add anonymous IDs.
>>
>> Theoretically usable, but extremely unhandy. I would not like so see
>> such an interface exposed to users.
> 
> What about just taking the device type (iow the savevm section name)?
> 
> In most scenarios, it's probably easier for a user and will still only 
> have a single match.

This naming is not yet exposed via any user interface. We would have to
extend info qtree, not sure if we want this...

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux



[Qemu-devel] [PATCH 24/31] VMDK: creating streamOptimized subformat

2011-09-06 Thread Kevin Wolf
From: Fam Zheng 

Creating streamOptimized subformat. Added subformat option
'streamOptimized', to create a image with compression enabled and each
cluster with a GrainMarker.

Signed-off-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c |   18 --
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 36a761f..54f7441 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1090,7 +1090,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t 
sector_num,
 }
 
 
-static int vmdk_create_extent(const char *filename, int64_t filesize, bool 
flat)
+static int vmdk_create_extent(const char *filename, int64_t filesize,
+  bool flat, bool compress)
 {
 int ret, i;
 int fd = 0;
@@ -1114,7 +1115,9 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize, bool flat)
 magic = cpu_to_be32(VMDK4_MAGIC);
 memset(&header, 0, sizeof(header));
 header.version = 1;
-header.flags = 3; /* ?? */
+header.flags =
+3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
+header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
 header.capacity = filesize / 512;
 header.granularity = 128;
 header.num_gtes_per_gte = 512;
@@ -1144,6 +1147,7 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize, bool flat)
 header.rgd_offset = cpu_to_le64(header.rgd_offset);
 header.gd_offset = cpu_to_le64(header.gd_offset);
 header.grain_offset = cpu_to_le64(header.grain_offset);
+header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
 
 header.check_bytes[0] = 0xa;
 header.check_bytes[1] = 0x20;
@@ -1285,7 +1289,7 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options)
 const char *fmt = NULL;
 int flags = 0;
 int ret = 0;
-bool flat, split;
+bool flat, split, compress;
 char ext_desc_lines[BUF_SIZE] = "";
 char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
 const int64_t split_size = 0x8000;  /* VMDK has constant split size */
@@ -1334,7 +1338,8 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options)
 } else if (strcmp(fmt, "monolithicFlat") &&
strcmp(fmt, "monolithicSparse") &&
strcmp(fmt, "twoGbMaxExtentSparse") &&
-   strcmp(fmt, "twoGbMaxExtentFlat")) {
+   strcmp(fmt, "twoGbMaxExtentFlat") &&
+   strcmp(fmt, "streamOptimized")) {
 fprintf(stderr, "VMDK: Unknown subformat: %s\n", fmt);
 return -EINVAL;
 }
@@ -1342,6 +1347,7 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options)
   strcmp(fmt, "twoGbMaxExtentSparse"));
 flat = !(strcmp(fmt, "monolithicFlat") &&
  strcmp(fmt, "twoGbMaxExtentFlat"));
+compress = !strcmp(fmt, "streamOptimized");
 if (flat) {
 desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
 } else {
@@ -1396,7 +1402,7 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options)
 snprintf(ext_filename, sizeof(ext_filename), "%s%s",
 path, desc_filename);
 
-if (vmdk_create_extent(ext_filename, size, flat)) {
+if (vmdk_create_extent(ext_filename, size, flat, compress)) {
 return -EINVAL;
 }
 filesize -= size;
@@ -1510,7 +1516,7 @@ static QEMUOptionParameter vmdk_create_options[] = {
 .type = OPT_STRING,
 .help =
 "VMDK flat extent format, can be one of "
-"{monolithicSparse (default) | monolithicFlat | 
twoGbMaxExtentSparse | twoGbMaxExtentFlat} "
+"{monolithicSparse (default) | monolithicFlat | 
twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
 },
 { NULL }
 };
-- 
1.7.6




[Qemu-devel] [PATCH 09/31] ide: Clean up case label indentation in ide_exec_cmd()

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 hw/ide/core.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index d1cbfe7..ff59a45 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -979,7 +979,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 s->status = READY_STAT | SEEK_STAT;
 ide_set_irq(s->bus);
 break;
-   case WIN_READ_EXT:
+case WIN_READ_EXT:
lba48 = 1;
 case WIN_READ:
 case WIN_READ_ONCE:
@@ -989,7 +989,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 s->req_nb_sectors = 1;
 ide_sector_read(s);
 break;
-   case WIN_WRITE_EXT:
+case WIN_WRITE_EXT:
lba48 = 1;
 case WIN_WRITE:
 case WIN_WRITE_ONCE:
@@ -1002,7 +1002,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
 s->media_changed = 1;
 break;
-   case WIN_MULTREAD_EXT:
+case WIN_MULTREAD_EXT:
lba48 = 1;
 case WIN_MULTREAD:
 if (!s->mult_sectors)
@@ -1027,7 +1027,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
 s->media_changed = 1;
 break;
-   case WIN_READDMA_EXT:
+case WIN_READDMA_EXT:
lba48 = 1;
 case WIN_READDMA:
 case WIN_READDMA_ONCE:
@@ -1036,7 +1036,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
ide_cmd_lba48_transform(s, lba48);
 ide_sector_start_dma(s, IDE_DMA_READ);
 break;
-   case WIN_WRITEDMA_EXT:
+case WIN_WRITEDMA_EXT:
lba48 = 1;
 case WIN_WRITEDMA:
 case WIN_WRITEDMA_ONCE:
@@ -1261,7 +1261,7 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 ide_set_irq(s->bus);
 break;
 
-   case WIN_SMART:
+case WIN_SMART:
if (s->drive_kind == IDE_CD)
goto abort_cmd;
if (s->hcyl != 0xc2 || s->lcyl != 0x4f)
-- 
1.7.6




[Qemu-devel] [PATCH 10/31] ide: Give vmstate structs internal linkage where possible

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 hw/ide/core.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index ff59a45..1806e00 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2010,7 +2010,7 @@ static bool ide_error_needed(void *opaque)
 }
 
 /* Fields for GET_EVENT_STATUS_NOTIFICATION ATAPI command */
-const VMStateDescription vmstate_ide_atapi_gesn_state = {
+static const VMStateDescription vmstate_ide_atapi_gesn_state = {
 .name ="ide_drive/atapi/gesn_state",
 .version_id = 1,
 .minimum_version_id = 1,
@@ -2022,7 +2022,7 @@ const VMStateDescription vmstate_ide_atapi_gesn_state = {
 }
 };
 
-const VMStateDescription vmstate_ide_drive_pio_state = {
+static const VMStateDescription vmstate_ide_drive_pio_state = {
 .name = "ide_drive/pio_state",
 .version_id = 1,
 .minimum_version_id = 1,
@@ -2084,7 +2084,7 @@ const VMStateDescription vmstate_ide_drive = {
 }
 };
 
-const VMStateDescription vmstate_ide_error_status = {
+static const VMStateDescription vmstate_ide_error_status = {
 .name ="ide_bus/error",
 .version_id = 1,
 .minimum_version_id = 1,
-- 
1.7.6




[Qemu-devel] [PATCH 11/31] block/raw: Fix to forward method bdrv_media_changed()

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Block driver "raw" forwards most methods to the underlying block
driver.  However, it doesn't implement method bdrv_media_changed().
Makes bdrv_media_changed() always return -ENOTSUP.

I believe -fda /dev/fd0 gives you raw over host_floppy, and disk
change detection (fdc register 7 bit 7) is broken.  Testing my theory
requires a computer museum, though.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 block/raw.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/block/raw.c b/block/raw.c
index 555db4f..f197479 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -75,6 +75,11 @@ static int raw_is_inserted(BlockDriverState *bs)
 return bdrv_is_inserted(bs->file);
 }
 
+static int raw_media_changed(BlockDriverState *bs)
+{
+return bdrv_media_changed(bs->file);
+}
+
 static void raw_eject(BlockDriverState *bs, int eject_flag)
 {
 bdrv_eject(bs->file, eject_flag);
@@ -137,8 +142,10 @@ static BlockDriver bdrv_raw = {
 .bdrv_discard   = raw_discard,
 
 .bdrv_is_inserted   = raw_is_inserted,
+.bdrv_media_changed = raw_media_changed,
 .bdrv_eject = raw_eject,
 .bdrv_set_locked= raw_set_locked,
+
 .bdrv_ioctl = raw_ioctl,
 .bdrv_aio_ioctl = raw_aio_ioctl,
 
-- 
1.7.6




[Qemu-devel] [PATCH 30/31] scsi: fill in additional sense length correctly

2011-09-06 Thread Kevin Wolf
From: Paolo Bonzini 

Even though we do not use them, we should include the last three
bytes of sense data in the additional sense length.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Kevin Wolf 
---
 hw/scsi-bus.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index d6fd19a..160eaee 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -498,6 +498,7 @@ void scsi_req_build_sense(SCSIRequest *req, SCSISense sense)
 memset(req->sense, 0, 18);
 req->sense[0] = 0xf0;
 req->sense[2] = sense.key;
+req->sense[7] = 10;
 req->sense[12] = sense.asc;
 req->sense[13] = sense.ascq;
 req->sense_len = 18;
@@ -883,7 +884,7 @@ int scsi_build_sense(uint8_t *in_buf, int in_len,
 /* Return fixed format sense buffer */
 buf[0] = 0xf0;
 buf[2] = sense.key;
-buf[7] = 7;
+buf[7] = 10;
 buf[12] = sense.asc;
 buf[13] = sense.ascq;
 return MIN(len, 18);
-- 
1.7.6




Re: [Qemu-devel] [PATCH 4/9] runstate_set(): Check for valid transitions

2011-09-06 Thread Luiz Capitulino
On Tue, 06 Sep 2011 17:55:12 +0200
Jan Kiszka  wrote:

> On 2011-09-06 15:14, Luiz Capitulino wrote:
> > This commit could have been folded with the previous one, however
> > doing it separately will allow for easy bisect and revert if needed.
> > 
> > Checking and testing all valid transitions wasn't trivial, chances
> > are this will need broader testing to become more stable.
> > 
> > Signed-off-by: Luiz Capitulino 
> > ---
> >  vl.c |  149 
> > +-
> >  1 files changed, 148 insertions(+), 1 deletions(-)
> > 
> > diff --git a/vl.c b/vl.c
> > index 9926d2a..fe3628a 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -332,9 +332,156 @@ bool runstate_check(RunState state)
> >  return current_run_state == state;
> >  }
> >  
> > +/* This function will abort() on invalid state transitions */
> >  void runstate_set(RunState new_state)
> >  {
> > -assert(new_state < RSTATE_MAX);
> > +switch (current_run_state) {
> > +case RSTATE_NO_STATE:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +case RSTATE_IN_MIGRATE:
> > +case RSTATE_PRE_LAUNCH:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_DEBUG:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_IN_MIGRATE:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +case RSTATE_PRE_LAUNCH:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_PANICKED:
> > +switch (new_state) {
> > +case RSTATE_PAUSED:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_IO_ERROR:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_PAUSED:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_POST_MIGRATE:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_PRE_LAUNCH:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +case RSTATE_POST_MIGRATE:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_PRE_MIGRATE:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +case RSTATE_POST_MIGRATE:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_RESTORE:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_RUNNING:
> > +switch (new_state) {
> > +case RSTATE_DEBUG:
> > +case RSTATE_PANICKED:
> > +case RSTATE_IO_ERROR:
> > +case RSTATE_PAUSED:
> > +case RSTATE_PRE_MIGRATE:
> > +case RSTATE_RESTORE:
> > +case RSTATE_SAVEVM:
> > +case RSTATE_SHUTDOWN:
> > +case RSTATE_WATCHDOG:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_SAVEVM:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_SHUTDOWN:
> > +switch (new_state) {
> > +case RSTATE_PAUSED:
> > +goto transition_ok;
> > +default:
> > +/* invalid transition */
> > +abort();
> > +}
> > +abort();
> > +case RSTATE_WATCHDOG:
> > +switch (new_state) {
> > +case RSTATE_RUNNING:
> > +goto trans

[Qemu-devel] [PATCH 07/31] block: Split change_cb() into change_media_cb(), resize_cb()

2011-09-06 Thread Kevin Wolf
From: Markus Armbruster 

Multiplexing callbacks complicates matters needlessly.

Signed-off-by: Markus Armbruster 
Signed-off-by: Kevin Wolf 
---
 block.c |   23 +++
 block.h |   12 +++-
 block_int.h |3 ---
 hw/ide/core.c   |8 ++--
 hw/sd.c |8 ++--
 hw/virtio-blk.c |8 +++-
 6 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/block.c b/block.c
index 00fe345..0aba1bf 100644
--- a/block.c
+++ b/block.c
@@ -44,7 +44,7 @@
 #include 
 #endif
 
-static void bdrv_dev_change_cb(BlockDriverState *bs, int reason);
+static void bdrv_dev_change_media_cb(BlockDriverState *bs);
 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 BlockDriverCompletionFunc *cb, void *opaque);
@@ -690,7 +690,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, 
int flags,
 
 if (!bdrv_key_required(bs)) {
 bs->media_changed = 1;
-bdrv_dev_change_cb(bs, CHANGE_MEDIA);
+bdrv_dev_change_media_cb(bs);
 }
 
 return 0;
@@ -727,7 +727,7 @@ void bdrv_close(BlockDriverState *bs)
 }
 
 bs->media_changed = 1;
-bdrv_dev_change_cb(bs, CHANGE_MEDIA);
+bdrv_dev_change_media_cb(bs);
 }
 }
 
@@ -806,10 +806,17 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const 
BlockDevOps *ops,
 bs->dev_opaque = opaque;
 }
 
-static void bdrv_dev_change_cb(BlockDriverState *bs, int reason)
+static void bdrv_dev_change_media_cb(BlockDriverState *bs)
 {
-if (bs->dev_ops && bs->dev_ops->change_cb) {
-bs->dev_ops->change_cb(bs->dev_opaque, reason);
+if (bs->dev_ops && bs->dev_ops->change_media_cb) {
+bs->dev_ops->change_media_cb(bs->dev_opaque);
+}
+}
+
+static void bdrv_dev_resize_cb(BlockDriverState *bs)
+{
+if (bs->dev_ops && bs->dev_ops->resize_cb) {
+bs->dev_ops->resize_cb(bs->dev_opaque);
 }
 }
 
@@ -1283,7 +1290,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 ret = drv->bdrv_truncate(bs, offset);
 if (ret == 0) {
 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
-bdrv_dev_change_cb(bs, CHANGE_SIZE);
+bdrv_dev_resize_cb(bs);
 }
 return ret;
 }
@@ -1659,7 +1666,7 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
 bs->valid_key = 1;
 /* call the change callback now, we skipped it on open */
 bs->media_changed = 1;
-bdrv_dev_change_cb(bs, CHANGE_MEDIA);
+bdrv_dev_change_media_cb(bs);
 }
 return ret;
 }
diff --git a/block.h b/block.h
index a72cfc9..7e3f587 100644
--- a/block.h
+++ b/block.h
@@ -28,8 +28,18 @@ typedef struct QEMUSnapshotInfo {
 uint64_t vm_clock_nsec; /* VM clock relative to boot */
 } QEMUSnapshotInfo;
 
+/* Callbacks for block device models */
 typedef struct BlockDevOps {
-void (*change_cb)(void *opaque, int reason);
+/*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Beware: doesn't run when a host device's physical media
+ * changes.  Sure would be useful if it did.
+ */
+void (*change_media_cb)(void *opaque);
+/*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+void (*resize_cb)(void *opaque);
 } BlockDevOps;
 
 #define BDRV_O_RDWR0x0002
diff --git a/block_int.h b/block_int.h
index ab31dc9..19ae75e 100644
--- a/block_int.h
+++ b/block_int.h
@@ -211,9 +211,6 @@ struct BlockDriverState {
 void *private;
 };
 
-#define CHANGE_MEDIA0x01
-#define CHANGE_SIZE 0x02
-
 struct BlockDriverAIOCB {
 AIOPool *pool;
 BlockDriverState *bs;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 26a9c5b..47a706e 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -783,15 +783,11 @@ static void ide_cfata_metadata_write(IDEState *s)
 }
 
 /* called when the inserted state of the media has changed */
-static void cdrom_change_cb(void *opaque, int reason)
+static void ide_cd_change_cb(void *opaque)
 {
 IDEState *s = opaque;
 uint64_t nb_sectors;
 
-if (!(reason & CHANGE_MEDIA)) {
-return;
-}
-
 bdrv_get_geometry(s->bs, &nb_sectors);
 s->nb_sectors = nb_sectors;
 
@@ -1743,7 +1739,7 @@ void ide_bus_reset(IDEBus *bus)
 }
 
 static const BlockDevOps ide_cd_block_ops = {
-.change_cb = cdrom_change_cb,
+.change_media_cb = ide_cd_change_cb,
 };
 
 int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
diff --git a/hw/sd.c b/hw/sd.c
index 1f00910..45e95f9 100644
--- a/hw/sd.c
+++ b/hw/sd.c
@@ -420,14 +420,10 @@ static void sd_reset(SDState *sd, BlockDriverState *bdrv)
 sd->pwd_len = 0;
 }
 
-static void sd_cardchange(void *opaque, int reason)
+static void sd_cardchange(void *opaque)
 {
 SDState *sd = opaque;
 
-if (!(reason & CHANGE_MEDIA)) {
-return;
-}
-
 qemu_set_irq(sd->inserted_cb, bdrv_is_inserted(sd->bdrv));
 if (bdrv_is_inserted(sd->bdrv)

  1   2   >